Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion data/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,4 +484,23 @@ def property_filter(df, parameter:str, prop:str, metertimestamp:str,
# not totally sure, mean seems to be a proper summary of e.g. a thermostat setpoint
df = df[prop].resample(summ_intv).mean()

return df
return df


@staticmethod
def working_days_extraction (df_prep: pd.DataFrame, period : list) -> list:

# Working days dataframe
df_working_days = df_prep.copy()
for start_date, end_date in period:
df_working_days = df_working_days.loc[~((df_working_days.index.get_level_values(1) >= start_date) & (df_working_days.index.get_level_values(1) <= end_date))]

# Non-working days dataframe
dfs=[]
for start_date, end_date in period:
for home_id, home_data in df_prep.groupby('id'):
included_df = home_data.loc[(home_data.index.get_level_values(1) >= start_date) & (home_data.index.get_level_values(1) <= end_date)]
dfs.append((home_id, included_df))
df_non_working_days = pd.concat([df for _, df in dfs])

return df_working_days, df_non_working_days
103 changes: 103 additions & 0 deletions examples/B4B_analysis_real_ds.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from datetime import datetime, timedelta\n",
"\n",
"# usually, two decimals suffice for displaying DataFrames (NB internally, precision may be higher)\n",
Expand All @@ -68,6 +69,7 @@
"from plotter import Plot\n",
"from styler import formatted_error_dataframe\n",
"\n",
"\n",
"%load_ext autoreload\n",
"%matplotlib inline\n",
"%matplotlib widget\n",
Expand Down Expand Up @@ -1133,6 +1135,107 @@
" df_improvements_when_co2_margin_50__ppm = df_compare.describe().filter(regex='^mae_|^rmae_|^rmse')\n",
" df_improvements_when_co2_margin_50__ppm.loc[df_improvements_when_co2_margin_50__ppm.index.get_level_values(0).isin(['mean', 'std', 'min', 'max'])]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "84ee9610-f07a-4db3-849d-3da911016119",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"holiday_periods = [\n",
" (pd.to_datetime('2022-10-15 00:00:00+02:00'), pd.to_datetime('2022-10-16 23:45:00+02:00')),\n",
" (pd.to_datetime('2022-10-22 00:00:00+02:00'), pd.to_datetime('2022-10-30 23:45:00+02:00')) \n",
"]\n",
"# Choose ferature to plot\n",
"plot_features = ['bms_co2__ppm', 'wind__m_s_1'] "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60b5c8c8-fe08-4db6-902d-425524011783",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"Plot.working_days_scatter_plot(df_prep, plot_features, holiday_periods)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14289d3e-b6ed-4811-9c1f-1f1c2b56b5b4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df_working_days, df_non_working_days = Preprocessor.working_days_extraction(df_prep, holiday_periods)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1794bfe3-b361-43a1-8ed4-487ed2b2778c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Working days scatter plot\n",
"features = ['CO2-meter-SCD4x_co2__ppm', 'bms_co2__ppm', 'bms_occupancy__bool', 'CO2-meter-SCD4x_occupancy__p', 'xovis_occupancy__p', 'bms_valve_frac__0']\n",
"Plot.features_scatter_plot(df=df_working_days, features=features)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99f6f19b-2021-4541-b392-88e1cd467dcb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Non-Working days scatter plot\n",
"features = ['CO2-meter-SCD4x_co2__ppm', 'bms_co2__ppm', 'bms_occupancy__bool', 'CO2-meter-SCD4x_occupancy__p', 'xovis_occupancy__p', 'bms_valve_frac__0']\n",
"Plot.features_scatter_plot(df=df_non_working_days, features=features)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6bbacf3e-06fc-4a0e-8d4c-ea7dc7a1d246",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ff22f7f-2c47-4337-8931-ffe5d60a728f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "966da740-a7fc-472d-a73a-9f303340a0d8",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6f5081f-9625-43be-ba12-2c804c5a7d70",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
37 changes: 37 additions & 0 deletions view/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,4 +392,41 @@ def features_scatter_plot(df: pd.DataFrame, features: list):
g = sns.PairGrid(df.loc[num[i]][features])
g.map(sns.scatterplot)
g.fig.suptitle(num[i])


@staticmethod
def working_days_scatter_plot(df_prep: pd.DataFrame, features: list, period: list):
# Non-working days dataframe
dfs=[]
for start_date, end_date in period:
for home_id, home_data in df_prep.groupby('id'):
included_df = home_data.loc[(home_data.index.get_level_values(1) >= start_date) & (home_data.index.get_level_values(1) <= end_date)]
dfs.append((home_id, included_df))
df_non_working_days = pd.concat([df for _, df in dfs])

# Working days dataframe
df_working_days = df_prep.copy()
for start_date, end_date in period:
df_working_days = df_working_days.loc[~((df_working_days.index.get_level_values(1) >= start_date) & (df_working_days.index.get_level_values(1) <= end_date))]


df_list = [df_working_days, df_non_working_days]
df_names = ['Working days', 'Non working days']

for x, df in enumerate(df_list):

unique_ids = df.index.get_level_values("id").unique()
num_rows = len(unique_ids) * len(features)
fig, axes = plt.subplots(num_rows, 1, figsize=(10, 5 * num_rows), sharex=True)
counter=0

for idx, id_ in enumerate(unique_ids):
subset_df = df.loc[id_]
for j, feature in enumerate(features):
axes[counter].scatter(subset_df.index, subset_df[features[j]], label=features[j])
# axes[counter].set_title(f"Id {id_}")
axes[counter].set_title('{} | ID: {}'.format(df_names[x], id_))
axes[counter].set_xlabel("Date and time")
axes[counter].set_ylabel(features[j])
axes[counter].legend()
counter +=1