scikit-learn-contrib
diff --git a/‎qolmat/imputations/imputers.py‎
Lines changed: 0 additions & 2 deletions b/‎qolmat/imputations/imputers.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎qolmat/imputations/rpca/rpca.py‎
Lines changed: 1 addition & 2 deletions b/‎qolmat/imputations/rpca/rpca.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎qolmat/imputations/rpca/rpca_noisy.py‎
Lines changed: 78 additions & 67 deletions b/‎qolmat/imputations/rpca/rpca_noisy.py‎
Lines changed: 78 additions & 67 deletions
diff --git a/‎qolmat/imputations/rpca/rpca_pcp.py‎
Lines changed: 39 additions & 1 deletion b/‎qolmat/imputations/rpca/rpca_pcp.py‎
Lines changed: 39 additions & 1 deletion
diff --git a/‎qolmat/imputations/rpca/rpca_utils.py‎
Lines changed: 0 additions & 5 deletions b/‎qolmat/imputations/rpca/rpca_utils.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎qolmat/utils/exceptions.py‎
Lines changed: 8 additions & 0 deletions b/‎qolmat/utils/exceptions.py‎
Lines changed: 8 additions & 0 deletions
@@ -1532,8 +1532,6 @@ def _transform_element(self, df: pd.DataFrame, col: str = "__all__") -> pd.DataF
                 X_select = X[is_na & is_valid]
                 y_imputed = self.estimators_[col].predict(X_select)
                 y_imputed = y_imputed.flatten().astype(float)
-                print("y_imputed")
-                print(y_imputed)
 
                 y_imputed = pd.Series(y_imputed, index=X_select.index)
 
 
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
-from typing import Optional, Tuple, Union
+from typing import Tuple, Union
 
 import numpy as np
 from numpy.typing import NDArray
 from sklearn.base import BaseEstimator, TransformerMixin
 
-from qolmat.imputations.rpca import rpca_utils
 from qolmat.utils import utils
 
 
 
@@ -4,12 +4,12 @@
 
 import numpy as np
 import scipy as scp
-from matplotlib import pyplot as plt
 from numpy.typing import NDArray
+from sklearn import utils as sku
 
 from qolmat.imputations.rpca import rpca_utils as rpca_utils
 from qolmat.imputations.rpca.rpca import RPCA
-from sklearn import utils as sku
+from qolmat.utils.exceptions import CostFunctionRPCANotMinimized
 
 
 class RPCANoisy(RPCA):
@@ -45,7 +45,7 @@ class RPCANoisy(RPCA):
     tol: Optional[float]
         stoppign critera, minimum difference between 2 consecutive iterations. By default,
         the value is set to 1e-6
-    norm: Optional[str]
+    norm: str
         error norm, can be "L1" or "L2". By default, the value is set to "L2"
     """
 
@@ -54,24 +54,24 @@ def __init__(
         random_state: Union[None, int, np.random.RandomState] = None,
         period: int = 1,
         rank: Optional[int] = None,
+        mu: Optional[float] = None,
         tau: Optional[float] = None,
         lam: Optional[float] = None,
         list_periods: List[int] = [],
         list_etas: List[float] = [],
         max_iterations: int = int(1e4),
         tol: float = 1e-6,
-        norm: Optional[str] = "L2",
-        do_report: bool = False,
+        norm: str = "L2",
     ) -> None:
         super().__init__(period=period, max_iterations=max_iterations, tol=tol)
         self.rng = sku.check_random_state(random_state)
         self.rank = rank
+        self.mu = mu
         self.tau = tau
         self.lam = lam
         self.list_periods = list_periods
         self.list_etas = list_etas
         self.norm = norm
-        self.do_report = do_report
 
     def decompose_rpca_L1(
         self, D: NDArray, Omega: NDArray, lam: float, tau: float, rank: int
@@ -110,8 +110,8 @@ def decompose_rpca_L1(
         """
         m, n = D.shape
         rho = 1.1
-        mu = 1e-2
-        mu_bar = mu * 1e10
+        mu = self.mu or 1e-2
+        mu_bar = mu * 1e3
 
         # init
         Y = np.ones((m, n))
@@ -122,20 +122,17 @@ def decompose_rpca_L1(
         L = np.ones((m, rank))
         Q = np.ones((n, rank))
         R = [np.ones((m, n - period)) for period in self.list_periods]
-        # temporal correlations
-        H = [rpca_utils.toeplitz_matrix(period, n, model="column") for period in self.list_periods]
 
-        ##
+        # matrices for temporal correlation
+        H = [rpca_utils.toeplitz_matrix(period, n, model="column") for period in self.list_periods]
         HHT = np.zeros((n, n))
         for index, _ in enumerate(self.list_periods):
             HHT += self.list_etas[index] * (H[index] @ H[index].T)
 
         Ir = np.eye(rank)
         In = np.eye(n)
 
-        increments = np.full((self.max_iterations,), np.nan, dtype=float)
-
-        for iteration in range(self.max_iterations):
+        for _ in range(self.max_iterations):
             X_temp = X.copy()
             A_temp = A.copy()
             L_temp = L.copy()
@@ -189,7 +186,6 @@ def decompose_rpca_L1(
             for index, _ in enumerate(self.list_periods):
                 Rc = np.maximum(Rc, np.linalg.norm(R[index] - R_temp[index], np.inf))
             tol = np.amax(np.array([Xc, Ac, Lc, Qc, Rc]))
-            increments[iteration] = tol
 
             if tol < self.tol:
                 break
@@ -202,7 +198,7 @@ def decompose_rpca_L2(
         self, D: NDArray, Omega: NDArray, lam: float, tau: float, rank: int
     ) -> Tuple:
         """
-        Compute the noisy RPCA with a L1 time penalisation
+        Compute the noisy RPCA with a L2 time penalisation
 
         Parameters
         ----------
@@ -237,14 +233,18 @@ def decompose_rpca_L2(
         m, n = D.shape
 
         # init
-        Y = np.zeros((m, n))
+        Y = np.full_like(D, 0)
         X = D.copy()
-        A = np.zeros((m, n))
-        L = np.ones((m, rank))
-        Q = np.ones((n, rank))
+        A = np.full_like(D, 0)
+        U, S, Vt = np.linalg.svd(X)
+        U = U[:, :rank]
+        S = S[:rank]
+        Vt = Vt[:rank, :]
+        L = U @ np.diag(np.sqrt(S))
+        Q = Vt.transpose() @ np.diag(np.sqrt(S))
 
-        mu = 1e-2
-        mu_bar = mu * 1e10
+        mu = self.mu or 1e-2
+        mu_bar = mu * 1e3
 
         # matrices for temporal correlation
         H = [rpca_utils.toeplitz_matrix(period, n, model="column") for period in self.list_periods]
@@ -255,14 +255,7 @@ def decompose_rpca_L2(
         Ir = np.eye(rank)
         In = np.eye(n)
 
-        increment = np.full((self.max_iterations,), np.nan, dtype=float)
-        errors_ano = []
-        errors_nuclear = []
-        errors_noise = []
-        errors_lagrange = []
-        self.list_report = []
-
-        for iteration in range(self.max_iterations):
+        for _ in range(self.max_iterations):
             X_temp = X.copy()
             A_temp = A.copy()
             L_temp = L.copy()
@@ -273,10 +266,10 @@ def decompose_rpca_L2(
                 b=(D - A + mu * L @ Q.T - Y).T,
             ).T
 
-            if np.any(~Omega):
-                A_omega = rpca_utils.soft_thresholding(D - X, lam)
-                A_omega_C = D - X
-                A = np.where(Omega, A_omega, A_omega_C)
+            if np.any(np.isnan(D)):
+                A_Omega = rpca_utils.soft_thresholding(D - X, lam)
+                A_Omega_C = D - X
+                A = np.where(Omega, A_Omega, A_Omega_C)
             else:
                 A = rpca_utils.soft_thresholding(D - X, lam)
 
@@ -300,43 +293,10 @@ def decompose_rpca_L2(
             Qc = np.linalg.norm(Q - Q_temp, np.inf)
 
             tol = max([Xc, Ac, Lc, Qc])
-            increment[iteration] = tol
-
-            _, values_singular, _ = np.linalg.svd(X, full_matrices=True)
-            errors_ano.append(np.sum(np.abs(A)))
-            errors_nuclear.append(np.sum(values_singular))
-            errors_noise.append(np.sum((D - X - A) ** 2))
-            errors_lagrange.append(np.sum((X - L @ Q.T) ** 2))
-
-            if self.do_report:
-                self.list_report.append((D, X, A))
 
             if tol < self.tol:
                 break
 
-        if self.do_report:
-            errors_ano_np = np.array(errors_ano)
-            errors_nuclear_np = np.array(errors_nuclear)
-            errors_noise_np = np.array(errors_noise)
-            errors_lagrange_np = np.array(errors_lagrange)
-
-            plt.plot(lam * errors_ano_np, label="Cost (ano)")
-            plt.plot(tau * errors_nuclear_np, label="Cost (SV)")
-            plt.plot(0.5 * errors_noise_np, label="Cost (noise)")
-            plt.plot(errors_lagrange_np, label="Cost (Lagrange)")
-            plt.plot(
-                lam * errors_ano_np + tau * errors_nuclear_np + errors_noise_np,
-                label="Total",
-                color="black",
-            )
-            plt.yscale("log")
-            # plt.gca().twinx()
-            # plt.plot(errors_cv, color="black")
-            plt.grid()
-            plt.yscale("log")
-            plt.legend()
-            plt.show()
-
         X = L @ Q.T
 
         M = X
@@ -411,7 +371,58 @@ def decompose_rpca(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
 
         if self.norm == "L1":
             M, A, U, V = self.decompose_rpca_L1(D, Omega, lam, tau, rank)
+
         elif self.norm == "L2":
             M, A, U, V = self.decompose_rpca_L2(D, Omega, lam, tau, rank)
 
+        self._check_cost_function_minimized(D, M, A, tau, lam, self.norm)
+
         return M, A
+
+    @staticmethod
+    def _check_cost_function_minimized(
+        observations: NDArray,
+        low_rank: NDArray,
+        anomalies: NDArray,
+        tau: float,
+        lam: float,
+        norm: str,
+    ):
+        """Check that the functional minimized by the RPCA
+        is smaller at the end than at the beginning
+
+        Parameters
+        ----------
+        observations : NDArray
+            observations matrix with first linear interpolation
+        low_rank : NDArray
+            low_rank matrix resulting from RPCA
+        anomalies : NDArray
+            sparse matrix resulting from RPCA
+        tau : float
+            parameter penalizing the nuclear norm of the low rank part
+        lam : float
+            parameter penalizing the L1-norm of the anomaly/sparse part
+        norm : str
+            norm of the temporal penalisation. Has to be `L1` or `L2`
+
+        Raises
+        ------
+        CostFunctionRPCANotMinimized
+            The RPCA does not minimized the cost function:
+            the starting cost is at least equal to the final one.
+        """
+        value_start = tau * np.linalg.norm(observations, "nuc")
+        if norm == "L1":
+            anomalies_norm = np.sum(np.abs(anomalies))
+            function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_1"
+        elif norm == "L2":
+            anomalies_norm = np.sum(anomalies**2)
+            function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_2"
+        value_end = (
+            np.sum((observations - low_rank - anomalies) ** 2)
+            + tau * np.linalg.norm(low_rank, "nuc")
+            + lam * anomalies_norm
+        )
+        if value_start + 1e-4 <= value_end:
+            raise CostFunctionRPCANotMinimized(function_str, float(value_start), float(value_end))
@@ -4,10 +4,11 @@
 
 import numpy as np
 from numpy.typing import NDArray
+from sklearn import utils as sku
 
 from qolmat.imputations.rpca import rpca_utils
 from qolmat.imputations.rpca.rpca import RPCA
-from sklearn import utils as sku
+from qolmat.utils.exceptions import CostFunctionRPCANotMinimized
 
 
 class RPCAPCP(RPCA):
@@ -69,11 +70,48 @@ def decompose_rpca(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
             M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu)
             A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu)
             A[~Omega] = (D - M)[~Omega]
+
             Y += mu * (D - M - A)
 
             error = np.linalg.norm(D - M - A, "fro") / D_norm
             errors[iteration] = error
 
             if error < self.tol:
                 break
+
+        self._check_cost_function_minimized(D, M, A, lam)
+
         return M, A
+
+    @staticmethod
+    def _check_cost_function_minimized(
+        observations: NDArray,
+        low_rank: NDArray,
+        anomalies: NDArray,
+        lam: float,
+    ):
+        """Check that the functional minimized by the RPCA
+        is smaller at the end than at the beginning
+
+        Parameters
+        ----------
+        observations : NDArray
+            observations matrix with first linear interpolation
+        low_rank : NDArray
+            low_rank matrix resulting from RPCA
+        anomalies : NDArray
+            sparse matrix resulting from RPCA
+        lam : float
+            parameter penalizing the L1-norm of the anomaly/sparse part
+
+        Raises
+        ------
+        CostFunctionRPCANotMinimized
+            The RPCA does not minimized the cost function:
+            the starting cost is at least equal to the final one.
+        """
+        value_start = np.linalg.norm(observations, "nuc")
+        value_end = np.linalg.norm(low_rank, "nuc") + lam * np.sum(np.abs(anomalies))
+        if value_start + 1e-4 <= value_end:
+            function_str = "||D||_* + lam ||A||_1"
+            raise CostFunctionRPCANotMinimized(function_str, float(value_start), float(value_end))
@@ -2,15 +2,10 @@
 Modular utility functions for RPCA
 """
 
-import warnings
-from typing import List, Optional, Tuple
 
 import numpy as np
-import pandas as pd
-import scipy
 from numpy.typing import NDArray
 from scipy.linalg import toeplitz
-from sklearn.neighbors import kneighbors_graph
 
 
 def approx_rank(
 
@@ -31,6 +31,14 @@ def __init__(self, subset: Any):
         super().__init__(f"Provided subset `{subset}` should be None or a list!")
 
 
+class CostFunctionRPCANotMinimized(Exception):
+    def __init__(self, name_fct: str, value_start: float, value_end: float):
+        super().__init__(
+            f"RPCA algorithm may provide bad results. Function {name_fct} increased from"
+            f" {value_start} to {value_end} instead of decreasing!"
+        )
+
+
 class NotDimension2(Exception):
     def __init__(self, shape: Tuple[int, ...]):
         super().__init__(f"Provided matrix is of shape {shape}, which is not of dimension 2!")