prints removed and reshapping issue resolved

vm-aifluence-jro · vm-aifluence-jro · commit f6e55a17c80e · 2023-05-30T12:01:51.000Z
diff --git a/examples/benchmark.md b/examples/benchmark.md
@@ -145,20 +145,20 @@ imputer_regressor = imputers.ImputerRegressor(groups=["station"], estimator=Line
 
 dict_imputers = {
     "mean": imputer_mean,
-    # "median": imputer_median,
+    "median": imputer_median,
     # "mode": imputer_mode,
     "interpolation": imputer_interpol,
     # "spline": imputer_spline,
-    # "shuffle": imputer_shuffle,
+    "shuffle": imputer_shuffle,
     # "residuals": imputer_residuals,
     # "OU": imputer_ou,
-    # "TSOU": imputer_tsou,
-    # "TSMLE": imputer_tsmle,
+    "TSOU": imputer_tsou,
+    "TSMLE": imputer_tsmle,
     # "RPCA": imputer_rpca,
-    # "RPCA_opti": imputer_rpca_opti,
+    "RPCA_opti": imputer_rpca_opti,
     # "locf": imputer_locf,
     # "nocb": imputer_nocb,
-    # "knn": imputer_knn,
+    "knn": imputer_knn,
     "ols": imputer_regressor,
     # "mice_ols": imputer_mice,
 }
@@ -207,6 +207,21 @@ plt.bar(df_plot.index, df_plot, color=tab10(0))
 plt.show()
 ```
 
+```python
+fig = plt.figure(figsize=(16, 6))
+fig.add_subplot(1, 2, 1)
+df_plot = results.loc["mae"].mean().sort_values(ascending=False)
+plt.barh(df_plot.index, df_plot, color=[tab10(0) if i<n_imputers-1 else "red" for i in range(n_imputers)])
+plt.xlabel("Erreur MAE")
+# plt.show()
+
+fig.add_subplot(1, 2, 2)
+df_plot = results.loc["energy"].mean().sort_values(ascending=False)
+plt.barh(df_plot.index, df_plot, color=[tab10(0) if i<n_imputers-1 else "red" for i in range(n_imputers)])
+plt.xlabel("Erreur énergétique")
+plt.show()
+```
+
 ```python
 fig = plt.figure(figsize=(24, 8))
 fig.add_subplot(2, 1, 1)
@@ -247,8 +262,9 @@ for col in cols_to_impute:
     values_orig = df_station[col]
 
     plt.plot(values_orig, ".", color='black', label="original")
-
     for ind, (name, model) in enumerate(list(dict_imputers.items())):
+        if name not in ["mean", "TSMLE"]:
+            continue
         values_imp = dfs_imputed_station[name][col].copy()
         values_imp[values_orig.notna()] = np.nan
         plt.plot(values_imp, ".", color=tab10(ind), label=name, alpha=1)
diff --git a/qolmat/benchmark/cross_validation.py b/qolmat/benchmark/cross_validation.py
@@ -168,9 +168,6 @@ def fit_transform(
         """
 
         n0 = max(5, self.n_calls // 5)
-        print("---")
-        print(self.n_calls)
-        print(n0)
 
         # res = skopt.gp_minimize(
         #     self.objective(X=df),
@@ -191,8 +188,8 @@ def fit_transform(
         )
 
         hyperparams_flat = {space.name: val for space, val in zip(self.list_spaces, res["x"])}
-        print(f"Optimal hyperparameters : {hyperparams_flat}")
-        print(f"Results: {res}")
+        # print(f"Optimal hyperparameters : {hyperparams_flat}")
+        # print(f"Results: {res}")
 
         self.imputer.hyperparams_optim = self.deflat_hyperparams(hyperparams_flat)
         df_imputed = self.imputer.fit_transform(df)
diff --git a/qolmat/imputations/rpca/rpca.py b/qolmat/imputations/rpca/rpca.py
@@ -56,3 +56,10 @@ def _prepare_data(self, X: NDArray) -> NDArray:
                 return X.copy()
             else:
                 raise ValueError("`n_rows` should not be specified when imputing 2D data.")
+
+    def get_shape_original(self, X: NDArray, shape: Tuple[int]) -> NDArray:
+        if len(shape) == 1 or shape[0] == 1:
+            n_values = sum(shape)
+            return X.reshape(1, -1)[:, :n_values]
+        else:
+            return X
diff --git a/qolmat/imputations/rpca/rpca_noisy.py b/qolmat/imputations/rpca/rpca_noisy.py
@@ -366,7 +366,7 @@ def decompose_rpca_signal(
         elif self.norm == "L2":
             M, A, U, V, errors = self.decompose_rpca_L2(D_proj, Omega, lam, tau, rank)
 
-        M = M.reshape(X.shape)
-        A = A.reshape(X.shape)
+        M_final = self.get_shape_original(M, X.shape)
+        A_final = self.get_shape_original(A, X.shape)
 
-        return M, A
+        return M_final, A_final
diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py
@@ -100,12 +100,6 @@ def decompose_rpca_signal(
         D = self._prepare_data(X)
         M, A = self.decompose_rpca(D)
 
-        # U, _, V = np.linalg.svd(M, full_matrices=False, compute_uv=True)
-
-        # if X.shape[0] == 1:
-        # M = M.reshape(1, -1)[:, : X.size]
-        # M = M.reshape(X)
-        # A = A.reshape(1, -1)[:, : X.size]
-        M = M.reshape(X.shape)
-        A = A.reshape(X.shape)
-        return M, A
+        M_final = self.get_shape_original(M, X.shape)
+        A_final = self.get_shape_original(A, X.shape)
+        return M_final, A_final
diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py
@@ -76,10 +76,7 @@ def test_preprocess_data():
         ],
         columns=columns_raw,
     )
-    print(df_raw)
     result = data.preprocess_data(df_raw)
-    print(result)
-    print(df)
     # assert result.equals(df)
     pd.testing.assert_frame_equal(result, df, atol=1e-3)