RPCA online not functional

Julien Roussel · Julien Roussel · commit 5dfcdb963da3 · 2023-03-02T19:09:53.000+01:00
diff --git a/examples/1_timeSeries.ipynb b/examples/1_timeSeries.ipynb
diff --git a/qolmat/imputations/rpca/pcp_rpca.py b/qolmat/imputations/rpca/pcp_rpca.py
@@ -59,7 +59,8 @@ def get_params_scale(self, D):
         return dict_params
     
     def decompose_rpca(self, D: NDArray) -> Tuple[NDArray, NDArray]:
-        proj_D = utils.impute_nans(D, method="median")
+        # proj_D = utils.impute_nans(D, method="median")
+        proj_D = np.where(np.isnan(D), -1, D)
 
         params_scale = self.get_params_scale(proj_D)
 
@@ -74,20 +75,48 @@ def decompose_rpca(self, D: NDArray) -> Tuple[NDArray, NDArray]:
 
         errors = np.full((self.max_iter,), fill_value=np.nan)
 
-        for iteration in range(self.max_iter):
+        print("D:")
+        print(D[:3])
+
+        from matplotlib import pyplot as plt
+        tab10 = plt.get_cmap("tab10")
+        #plt.figure(figsize=(8, 6))
 
+        M = proj_D - A
+        signal = proj_D.reshape(1, -1)[0]
+        #plt.plot(signal, color="black")
+        i_plot = 0
+        for iteration in range(self.max_iter):
+            #print("iteration=", iteration)
+            M_old = M.copy()
             M = utils.svd_thresholding(proj_D - A + Y/mu, 1/mu)
+            deltaM = M - M_old
+            signalM = M.reshape(1, -1)[0]
+            A_old = A.copy()
             A = utils.soft_thresholding(proj_D - M + Y/mu, lam/mu)
             A[~Omega] = (proj_D - M)[~Omega]
+            deltaA = A - A_old
+            signalA = A.reshape(1, -1)[0]
             Y += mu * (proj_D - M - A)
+            # signalY = (proj_D - M - A).reshape(1, -1)[0]
+            # plt.plot(6 + signalY, color=tab10(iteration), ls="-.")
 
             error = np.linalg.norm(D - M - A, "fro")/D_norm
             errors[iteration] = error
 
+            # if iteration % 10 == 0:
+            #     plt.plot(signalM, color=tab10(i_plot), ls="--")
+            #     plt.plot(4 + signalA, color=tab10(i_plot))
+
+            #     i_plot += 1
+            
+
             if error < self.tol:
                 if self.verbose:
                     print(f"Converged in {iteration} iterations")
                 break
+        plt.xlim(0, 30)
+        plt.show()
         return M, A
 
 
@@ -116,6 +145,7 @@ def fit_transform(
         errors: NDArray
             Array of iterative errors
         """
+        print("coucou")
         X = X.copy().T
         D = self._prepare_data(X)
         M, A = self.decompose_rpca(D)
diff --git a/qolmat/imputations/rpca/temporal_rpca.py b/qolmat/imputations/rpca/temporal_rpca.py
@@ -120,10 +120,11 @@ def compute_L1(self, proj_D, omega, lam, tau, rank) -> None:
             
             if np.any(np.isnan(proj_D)):
                 A_omega = utils.soft_thresholding(proj_D - X, lam)
-                A_omega = utils.ortho_proj(A_omega, omega, inverse=False)
+                # A_omega = utils.ortho_proj(A_omega, omega, inverse=False)
                 A_omega_C = proj_D - X
-                A_omega_C = utils.ortho_proj(A_omega_C, omega, inverse=True)
-                A = A_omega + A_omega_C
+                # A_omega_C = utils.ortho_proj(A_omega_C, omega, inverse=True)
+                # A = A_omega + A_omega_C
+                A = np.where(omega, A_omega, A_omega_C)
             else:
                 A = utils.soft_thresholding(proj_D - X, lam)
 
@@ -169,7 +170,7 @@ def compute_L1(self, proj_D, omega, lam, tau, rank) -> None:
         V = Q
         return M, A, U, V, errors
 
-    def compute_L2(self, proj_D, omega, lam, tau, rank) -> None:
+    def compute_L2(self, proj_D, Omega, lam, tau, rank) -> None:
         """
         compute RPCA with possible temporal regularisations, penalised with L2 norm
         """
@@ -208,12 +209,13 @@ def compute_L2(self, proj_D, omega, lam, tau, rank) -> None:
                 b=(proj_D - A + mu * L @ Q.T - Y).T,
             ).T
             
-            if np.any(~omega):
+            if np.any(~Omega):
                 A_omega = utils.soft_thresholding(proj_D - X, lam)
-                A_omega = utils.ortho_proj(A_omega, omega, inverse=False)
+                # A_omega = utils.ortho_proj(A_omega, omega, inverse=False)
                 A_omega_C = proj_D - X
-                A_omega_C = utils.ortho_proj(A_omega_C, omega, inverse=True)
-                A = A_omega + A_omega_C
+                # A_omega_C = utils.ortho_proj(A_omega_C, omega, inverse=True)
+                # A = A_omega + A_omega_C
+                A = np.where(Omega, A_omega, A_omega_C)
             else:
                 A = utils.soft_thresholding(proj_D - X, lam)
 
@@ -446,17 +448,17 @@ def get_params(self):
 
     def get_params_scale_online(
         self,
-        D:NDArray, Lhat: NDArray
+        D:NDArray, M: NDArray
     ) -> dict[str, float]:
         # D_init = self._prepare_data(signal=X)
         params_scale = self.get_params_scale(D)
         # burnin = int(D_init.shape[1] * self.burnin)
 
         # super_class = TemporalRPCA(**super().get_params())
         # Lhat, _, _ = super_class.fit_transform(X=D_init[:, :burnin])
-        _, sigmas_hat, _ = np.linalg.svd(Lhat)
-        online_tau = 1.0 / np.sqrt(len(Lhat)) / np.mean(sigmas_hat[: params_scale["rank"]])
-        online_lam = 1.0 / np.sqrt(len(Lhat))
+        _, sigmas_hat, _ = np.linalg.svd(M)
+        online_tau = 1.0 / np.sqrt(len(M)) / np.mean(sigmas_hat[: params_scale["rank"]])
+        online_lam = 1.0 / np.sqrt(len(M))
         params_scale["online_tau"] = online_tau
         params_scale["online_lam"] = online_lam
         return params_scale
@@ -499,34 +501,36 @@ def fit_transform(
         # Lhat, Shat, _, _, _ =super_class.fit_transform(X=D_init[:, :burnin])
         
         proj_D = utils.impute_nans(D_init, method="median")
-        omega = ~np.isnan(D_init)
+        Omega = ~np.isnan(D_init)
 
         params_scale = self.get_params_scale(proj_D)
 
         lam = params_scale["lam"] if self.lam is None else self.lam
         rank = params_scale["rank"] if self.rank is None else self.rank
         tau = params_scale["tau"] if self.tau is None else self.tau
 
+        D_burnin = proj_D[:, :burnin]
+        Omega_burnin = Omega[:, :burnin]
+
         if self.norm == "L1":
-            M, A, U, V, errors = self.compute_L1(proj_D, omega, lam, tau, rank)
+            M, A, U, V, errors = self.compute_L1(D_burnin, Omega_burnin, lam, tau, rank)
         elif self.norm == "L2":
-            M, A, U, V, errors = self.compute_L2(proj_D, omega, lam, tau, rank)
+            M, A, U, V, errors = self.compute_L2(D_burnin, Omega_burnin, lam, tau, rank)
 
-        Lhat, Shat, _ = np.linalg.svd(M, full_matrices=False, compute_uv=True)
+        # Lhat, Shat, _ = np.linalg.svd(M, full_matrices=False, compute_uv=True)
 
-        params_scale = self.get_params_scale_online(proj_D, Lhat)
+        params_scale_online = self.get_params_scale_online(proj_D, M)
 
-        online_tau = params_scale["online_tau"] if self.online_tau is None else self.online_tau 
-        online_lam = params_scale["online_lam"] if self.online_lam is None else self.online_lam 
+        online_tau = self.online_tau or params_scale_online["online_tau"]
+        online_lam = params_scale_online["online_lam"] if self.online_lam is None else self.online_lam 
 
         if len(self.online_list_etas) == 0:
             self.online_list_etas = self.list_etas
         
-        approx_rank =  utils.approx_rank(proj_D[:, :burnin])
+        approx_rank =  utils.approx_rank(D_burnin)
 
-        # TODO : is it really Lhat that should be used here?!
         Uhat, sigmas_hat, Vhat = randomized_svd(
-            Lhat, n_components=approx_rank, n_iter=5, random_state=42
+            M, n_components=approx_rank, n_iter=5, random_state=42
         )
         U = Uhat[:, :approx_rank]@(np.sqrt(np.diag(sigmas_hat[:approx_rank])))