energietransitie · HosseinRahmani64 · Jul 19, 2023 · Jul 26, 2023
diff --git a/data/preprocessor.py b/data/preprocessor.py
@@ -484,4 +484,23 @@ def property_filter(df, parameter:str, prop:str, metertimestamp:str,
             # not totally sure, mean seems to be a proper summary of e.g. a thermostat setpoint
             df = df[prop].resample(summ_intv).mean()
 
-        return df
+        return df
+
+
+    @staticmethod
+    def working_days_extraction (df_prep: pd.DataFrame, period : list) -> list:
+
+        # Working days dataframe
+        df_working_days = df_prep.copy()
+        for start_date, end_date in period:
+            df_working_days = df_working_days.loc[~((df_working_days.index.get_level_values(1) >= start_date) & (df_working_days.index.get_level_values(1) <= end_date))]       
+
+        # Non-working days dataframe
+        dfs=[]
+        for start_date, end_date in period:
+            for home_id, home_data in df_prep.groupby('id'):
+                included_df = home_data.loc[(home_data.index.get_level_values(1) >= start_date) & (home_data.index.get_level_values(1) <= end_date)]
+                dfs.append((home_id, included_df))
+        df_non_working_days = pd.concat([df for _, df in dfs])
+
+        return df_working_days, df_non_working_days
diff --git a/examples/B4B_analysis_real_ds.ipynb b/examples/B4B_analysis_real_ds.ipynb
@@ -44,6 +44,7 @@
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
     "from datetime import datetime, timedelta\n",
     "\n",
     "# usually, two decimals suffice for displaying DataFrames (NB internally, precision may be higher)\n",
@@ -68,6 +69,7 @@
     "from plotter import Plot\n",
     "from styler import formatted_error_dataframe\n",
     "\n",
+    "\n",
     "%load_ext autoreload\n",
     "%matplotlib inline\n",
     "%matplotlib widget\n",
@@ -1133,6 +1135,107 @@
     "    df_improvements_when_co2_margin_50__ppm = df_compare.describe().filter(regex='^mae_|^rmae_|^rmse')\n",
     "    df_improvements_when_co2_margin_50__ppm.loc[df_improvements_when_co2_margin_50__ppm.index.get_level_values(0).isin(['mean', 'std', 'min', 'max'])]\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84ee9610-f07a-4db3-849d-3da911016119",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "holiday_periods = [\n",
+    "    (pd.to_datetime('2022-10-15 00:00:00+02:00'), pd.to_datetime('2022-10-16 23:45:00+02:00')),\n",
+    "    (pd.to_datetime('2022-10-22 00:00:00+02:00'), pd.to_datetime('2022-10-30 23:45:00+02:00')) \n",
+    "]\n",
+    "# Choose ferature to plot\n",
+    "plot_features = ['bms_co2__ppm', 'wind__m_s_1'] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60b5c8c8-fe08-4db6-902d-425524011783",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "Plot.working_days_scatter_plot(df_prep, plot_features, holiday_periods)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14289d3e-b6ed-4811-9c1f-1f1c2b56b5b4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "df_working_days, df_non_working_days = Preprocessor.working_days_extraction(df_prep, holiday_periods)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1794bfe3-b361-43a1-8ed4-487ed2b2778c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Working days scatter plot\n",
+    "features = ['CO2-meter-SCD4x_co2__ppm', 'bms_co2__ppm', 'bms_occupancy__bool', 'CO2-meter-SCD4x_occupancy__p', 'xovis_occupancy__p', 'bms_valve_frac__0']\n",
+    "Plot.features_scatter_plot(df=df_working_days, features=features)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99f6f19b-2021-4541-b392-88e1cd467dcb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Non-Working days scatter plot\n",
+    "features = ['CO2-meter-SCD4x_co2__ppm', 'bms_co2__ppm', 'bms_occupancy__bool', 'CO2-meter-SCD4x_occupancy__p', 'xovis_occupancy__p', 'bms_valve_frac__0']\n",
+    "Plot.features_scatter_plot(df=df_non_working_days, features=features)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6bbacf3e-06fc-4a0e-8d4c-ea7dc7a1d246",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ff22f7f-2c47-4337-8931-ffe5d60a728f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "966da740-a7fc-472d-a73a-9f303340a0d8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6f5081f-9625-43be-ba12-2c804c5a7d70",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

diff --git a/view/plotter.py b/view/plotter.py
@@ -392,4 +392,41 @@ def features_scatter_plot(df: pd.DataFrame, features: list):
             g = sns.PairGrid(df.loc[num[i]][features])
             g.map(sns.scatterplot)
             g.fig.suptitle(num[i])
+
+
+    @staticmethod
+    def working_days_scatter_plot(df_prep: pd.DataFrame, features: list, period: list):
+        # Non-working days dataframe
+        dfs=[]
+        for start_date, end_date in period:
+            for home_id, home_data in df_prep.groupby('id'):
+                included_df = home_data.loc[(home_data.index.get_level_values(1) >= start_date) & (home_data.index.get_level_values(1) <= end_date)]
+                dfs.append((home_id, included_df))
+        df_non_working_days = pd.concat([df for _, df in dfs]) 
+
+        # Working days dataframe
+        df_working_days = df_prep.copy()
+        for start_date, end_date in period:
+            df_working_days = df_working_days.loc[~((df_working_days.index.get_level_values(1) >= start_date) & (df_working_days.index.get_level_values(1) <= end_date))]
 
+
+        df_list = [df_working_days, df_non_working_days]
+        df_names = ['Working days', 'Non working days']
+
+        for x, df in enumerate(df_list): 
+
+            unique_ids = df.index.get_level_values("id").unique()
+            num_rows = len(unique_ids) * len(features)
+            fig, axes = plt.subplots(num_rows, 1, figsize=(10, 5 * num_rows), sharex=True)
+            counter=0
+
+            for idx, id_ in enumerate(unique_ids):
+                subset_df = df.loc[id_]       
+                for j, feature in enumerate(features):
+                    axes[counter].scatter(subset_df.index, subset_df[features[j]], label=features[j])
+                    # axes[counter].set_title(f"Id {id_}")
+                    axes[counter].set_title('{} | ID: {}'.format(df_names[x], id_))
+                    axes[counter].set_xlabel("Date and time")
+                    axes[counter].set_ylabel(features[j])
+                    axes[counter].legend()
+                    counter +=1