scikit-learn-contrib
diff --git a/‎docs/conf.py‎
Lines changed: 13 additions & 10 deletions b/‎docs/conf.py‎
Lines changed: 13 additions & 10 deletions
diff --git a/‎robust_pca/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎robust_pca/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎robust_pca/_version.py‎
Lines changed: 1 addition & 1 deletion b/‎robust_pca/_version.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎robust_pca/benchmark/comparator.py‎
Lines changed: 12 additions & 17 deletions b/‎robust_pca/benchmark/comparator.py‎
Lines changed: 12 additions & 17 deletions
diff --git a/‎robust_pca/benchmark/utils.py‎
Lines changed: 8 additions & 6 deletions b/‎robust_pca/benchmark/utils.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎robust_pca/classes/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎robust_pca/classes/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎robust_pca/classes/graph_rpca.py‎
Lines changed: 27 additions & 24 deletions b/‎robust_pca/classes/graph_rpca.py‎
Lines changed: 27 additions & 24 deletions
@@ -12,16 +12,17 @@
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('..'))
+
+sys.path.insert(0, os.path.abspath(".."))
 
 # -- Project information -----------------------------------------------------
 
-project = 'robust-pca'
-copyright = '2022, Quantmetry'
-author = 'Quantmetry'
+project = "robust-pca"
+copyright = "2022, Quantmetry"
+author = "Quantmetry"
 
 # The full version, including alpha/beta/rc tags
-release = '0.1'
+release = "0.1"
 
 
 # -- General configuration ---------------------------------------------------
@@ -38,15 +39,17 @@
     "sphinx.ext.mathjax",
     "numpydoc",
 ]
-mathjax_path = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
+mathjax_path = (
+    "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
+)
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # generate autosummary even if no references
 autosummary_generate = True
@@ -56,9 +59,9 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
@@ -3,4 +3,4 @@
 from . import classes
 from . import utils
 
-__all__ = ["classes", "utils", "__version__"]
+__all__ = ["classes", "utils", "__version__"]
@@ -1 +1 @@
-__version__ = "0.1"
+__version__ = "0.1"
@@ -1,7 +1,7 @@
 import pandas as pd
 import numpy as np
 from robust_pca.benchmark import cross_validation
-from robust_pca.benchmark import  utils
+from robust_pca.benchmark import utils
 from sklearn.metrics import (
     mean_squared_error,
     mean_absolute_error,
@@ -14,39 +14,32 @@
 import matplotlib.pyplot as plt
 
 
-
 class Comparator:
     def __init__(
         self,
         data,
         ratio_missing,
         models_to_test,
         cols_to_impute,
+        n_samples=1,
         search_params={},
         corruption="missing",
-        filter_value_nan=-1e10
+        filter_value_nan=-1e10,
     ):
 
         self.df = data[cols_to_impute]
         self.ratio_missing = ratio_missing
         self.cols_to_impute = cols_to_impute
+        self.n_samples = n_samples
         self.filter_value_nan = filter_value_nan
         self.models_to_test = models_to_test
         self.search_params = search_params
         self.corruption = corruption
 
-    def create_corruptions(
-        self, 
-        df: pd.DataFrame, 
-        random_state: Optional[int] = 29
-    ):
+    def create_corruptions(self, df: pd.DataFrame, random_state: Optional[int] = 29):
 
         self.df_is_altered = utils.choice_with_mask(
-            df, 
-            df.notna(), 
-            self.ratio_missing, 
-            self.filter_value_nan,
-            random_state
+            df, df.notna(), self.ratio_missing, self.filter_value_nan, random_state
         )
 
         self.corrupted_df = df.copy()
@@ -58,7 +51,9 @@ def create_corruptions(
             )
 
     def get_errors(
-        self, signal_ref: pd.DataFrame, signal_imputed: pd.DataFrame,
+        self,
+        signal_ref: pd.DataFrame,
+        signal_imputed: pd.DataFrame,
     ) -> float:
 
         rmse = utils.mean_squared_error(
@@ -86,7 +81,7 @@ def compare(self):
 
             df = self.df[self.cols_to_impute]
             errors = defaultdict(list)
-            for _ in range(1):
+            for _ in range(self.n_samples):
                 random_state = np.random.randint(0, 10 * 9)
                 self.create_corruptions(df, random_state=random_state)
                 cv = cross_validation.CrossValidation(
@@ -96,9 +91,9 @@ def compare(self):
                     ratio_missing=self.ratio_missing,
                     corruption=self.corruption,
                 )
-                #print("# nan before imputation:", df.isna().sum().sum())
+                # print("# nan before imputation:", df.isna().sum().sum())
                 imputed_df = cv.fit_transform(self.corrupted_df)
-                #print("# nan after imputation...:", imputed_df.isna().sum().sum())
+                # print("# nan after imputation...:", imputed_df.isna().sum().sum())
                 for k, v in self.get_errors(df, imputed_df).items():
                     errors[k].append(v)
 
 
@@ -11,6 +11,7 @@
 
 BOUNDS = Bounds(1, np.inf, keep_feasible=True)
 
+
 def get_search_space(tested_model, search_params):
     search_space = None
     search_name = None
@@ -54,9 +55,9 @@ def custom_groupby(df, groups):
 def choice_with_mask(df, mask, ratio, filter_value=None, random_state=None):
     mask = mask.to_numpy().flatten()
     if filter_value:
-        mask_filter = (df.values>filter_value).flatten()
+        mask_filter = (df.values > filter_value).flatten()
         mask += mask_filter
-    
+
     indices = np.argwhere(mask)
     indices = resample(
         indices,
@@ -126,6 +127,7 @@ def aggregate_time_data(df, target, agg_time):
     )
     return df_aggregated
 
+
 def cross_entropy(t, t_hyp):
     loss = np.sum(t * np.log(t / t_hyp))
     jac = np.log(t / t_hyp) - 1
@@ -184,7 +186,7 @@ def impute_entropy_day(df, target, ts_agg, agg_time, zero_soil=0.0):
     df_day["n_train"] = df_day.groupby("datetime_round")[target].transform(
         lambda x: x.shape[0]
     )
-    
+
     df_day["hyp_values"] = (
         df_day[["datetime_round"]]
         .merge(ts_agg, left_on="datetime_round", right_on="agg_time", how="left")[
@@ -204,15 +206,15 @@ def impute_entropy_day(df, target, ts_agg, agg_time, zero_soil=0.0):
 
     df_day["impute"] = np.nan
     df_day.loc[is_in_zero_slot, "impute"] = 0
-    
+
     non_zero_impute = impute_by_max_entropy(
         df_dt=df_day.loc[~is_in_zero_slot, "datetime"].values,
         df_dt_agg=ts_agg.loc[ts_agg[col_name] > zero_soil, "agg_time"].values,
         df_values_agg=ts_agg.loc[ts_agg[col_name] > zero_soil, col_name].values,
         freq=agg_time,
         df_values_hyp=df_day.loc[~is_in_zero_slot, "hyp_values"].values,
     )
-    
+
     df_day.loc[~is_in_zero_slot, "impute"] = (
         df_day.loc[~is_in_zero_slot, ["datetime"]]
         .merge(non_zero_impute, on="datetime", how="left")["impute"]
@@ -221,4 +223,4 @@ def impute_entropy_day(df, target, ts_agg, agg_time, zero_soil=0.0):
 
     df_res = df.merge(df_day[["datetime", "impute"]], on="datetime", how="left")
 
-    return df_res
+    return df_res
@@ -1,4 +1,4 @@
 from . import rpca
 from . import improved_rpca
 from . import noisy_rpca
-from . import graph_rpca
+from . import graph_rpca
@@ -12,7 +12,7 @@ class GraphRPCA:
 
     References
     ----------
-    Shahid, Nauman, et al. "Fast robust PCA on graphs." 
+    Shahid, Nauman, et al. "Fast robust PCA on graphs."
     IEEE Journal of Selected Topics in Signal Processing 10.4 (2016): 740-756.
 
     Parameters
@@ -28,7 +28,7 @@ class GraphRPCA:
     gamma1 : int
         regularizing parameter for the graph G1, constructed from the columns of D
     gamma2 : int
-        regularizing parameter for the graph G1, constructed from the rows of D    
+        regularizing parameter for the graph G1, constructed from the rows of D
     G1 : Optional[np.ndarray]
         graph G1, constructed from the columns of D
     G2 : Optional[np.ndarray]
@@ -58,7 +58,7 @@ def __init__(
         nbg2: Optional[int] = 10,
         maxIter: Optional[int] = int(1e4),
         tol: Optional[float] = 1e-6,
-        cv:  Optional[int] = 5,
+        cv: Optional[int] = 5,
         verbose: Optional[bool] = False,
     ) -> None:
 
@@ -80,16 +80,16 @@ def __init__(
         self.maxIter = maxIter
         self.tol = tol
         self.verbose = verbose
-        
+
         self._prepare_data()
 
     def _prepare_data(self) -> None:
         """Prepare data fot RPCA computation:
-                Transform signal to matrix if needed
-                Get the omega matrix
-                Impute the nan values if needed
+        Transform signal to matrix if needed
+        Get the omega matrix
+        Impute the nan values if needed
         """
-        
+
         self.ret = 0
         if (self.D is None) and (self.period is None):
             self.period = utils.get_period(self.signal)
@@ -98,7 +98,7 @@ def _prepare_data(self) -> None:
 
         self.initial_D = self.D.copy()
         self.initial_D_proj = utils.impute_nans(self.initial_D, method="median")
-        
+
         self.omega = 1 - (self.D != self.D)
         if np.isnan(np.sum(self.D)):
             self.proj_D = utils.impute_nans(self.D, method="median")
@@ -113,15 +113,15 @@ def compute_graph_rpca(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         Tuple[np.ndarray, np.ndarray, np.ndarray]
             observations, low-rank and sparse matrices
         """
-        
+
         self.omega = 1 - (self.D != self.D)
         if np.isnan(np.sum(self.D)):
             self.proj_D = utils.impute_nans(self.D, method="median")
         else:
             self.proj_D = self.D
         if self.rank is None:
             self.rank = utils.approx_rank(self.proj_D)
-            
+
         if self.G1 is None:
             self.G1 = utils.construct_graph((self.D).T, n_neighbors=self.nbg1)
         if self.G2 is None:
@@ -148,10 +148,12 @@ def compute_graph_rpca(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
             grad_g = 2 * (self.gamma1 * Y @ laplacian1 + self.gamma2 * laplacian2 @ Y)
 
             X = utils.proximal_operator(Y_past - lam * grad_g, self.D, lam)
-            t = (1 + (1 + 4 * t_past ** 2) ** 0.5) / 2
+            t = (1 + (1 + 4 * t_past**2) ** 0.5) / 2
             Y = X + (t_past - 1) / t * (X - X_past)
 
-            errors.append(np.linalg.norm(Y - Y_past, "fro") / np.linalg.norm(Y_past, "fro"))
+            errors.append(
+                np.linalg.norm(Y - Y_past, "fro") / np.linalg.norm(Y_past, "fro")
+            )
             if errors[-1] < self.tol:
                 if self.verbose:
                     print(
@@ -177,12 +179,12 @@ class GraphRPCAHyperparams(GraphRPCA):
     GraphRPCA : Type[GraphRPCA]
         [description]
     """
-    
+
     def add_hyperparams(
         self,
         hyperparams_gamma1: Optional[List[float]] = [],
         hyperparams_gamma2: Optional[List[float]] = [],
-        cv:  Optional[int] = 5,
+        cv: Optional[int] = 5,
     ) -> None:
         """Define the search space associated to each hyperparameter
 
@@ -224,7 +226,7 @@ def objective(self, args):
         float
             criterion to minimise
         """
-        
+
         self.gamma1 = args[0]
         self.gamma2 = args[1]
 
@@ -244,8 +246,7 @@ def objective(self, args):
 
             error = (
                 np.linalg.norm(
-                    self.initial_D[indices_x, indices_y]
-                    - W[indices_x, indices_y],
+                    self.initial_D[indices_x, indices_y] - W[indices_x, indices_y],
                     1,
                 )
                 / nb_missing
@@ -255,20 +256,22 @@ def objective(self, args):
 
         if len(errors) == 0:
             print("Warning: not converged - return default 10^10")
-            return 10 ** 10
+            return 10**10
 
         return np.mean(errors)
-    
-    def compute_graph_rpca_hyperparams(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+
+    def compute_graph_rpca_hyperparams(
+        self,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Decompose a matrix into a low rank part and a sparse part
-        Hyperparams are set by Bayesian optimisation and cross-validation 
+        Hyperparams are set by Bayesian optimisation and cross-validation
 
         Returns
         -------
         Tuple[np.ndarray, np.ndarray]
             the low rank matrix and the sparse matrix
         """
-        
+
         res = skopt.gp_minimize(
             self.objective,
             self.search_space,
@@ -285,4 +288,4 @@ def compute_graph_rpca_hyperparams(self) -> Tuple[np.ndarray, np.ndarray, np.nda
         self.gamma2 = res.x[1]
         D, X, A = self.compute_graph_rpca()
 
-        return D, X, A
+        return D, X, A
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.1"`
	`1`	`+__version__ = "0.1"`