44
55import numpy as np
66import scipy as scp
7- from matplotlib import pyplot as plt
87from numpy .typing import NDArray
8+ from sklearn import utils as sku
99
1010from qolmat .imputations .rpca import rpca_utils as rpca_utils
1111from qolmat .imputations .rpca .rpca import RPCA
12- from sklearn import utils as sku
12+ from qolmat . utils . exceptions import CostFunctionRPCANotMinimized
1313
1414
1515class RPCANoisy (RPCA ):
@@ -45,7 +45,7 @@ class RPCANoisy(RPCA):
4545 tol: Optional[float]
4646 stoppign critera, minimum difference between 2 consecutive iterations. By default,
4747 the value is set to 1e-6
48- norm: Optional[ str]
48+ norm: str
4949 error norm, can be "L1" or "L2". By default, the value is set to "L2"
5050 """
5151
@@ -54,24 +54,24 @@ def __init__(
5454 random_state : Union [None , int , np .random .RandomState ] = None ,
5555 period : int = 1 ,
5656 rank : Optional [int ] = None ,
57+ mu : Optional [float ] = None ,
5758 tau : Optional [float ] = None ,
5859 lam : Optional [float ] = None ,
5960 list_periods : List [int ] = [],
6061 list_etas : List [float ] = [],
6162 max_iterations : int = int (1e4 ),
6263 tol : float = 1e-6 ,
63- norm : Optional [str ] = "L2" ,
64- do_report : bool = False ,
64+ norm : str = "L2" ,
6565 ) -> None :
6666 super ().__init__ (period = period , max_iterations = max_iterations , tol = tol )
6767 self .rng = sku .check_random_state (random_state )
6868 self .rank = rank
69+ self .mu = mu
6970 self .tau = tau
7071 self .lam = lam
7172 self .list_periods = list_periods
7273 self .list_etas = list_etas
7374 self .norm = norm
74- self .do_report = do_report
7575
7676 def decompose_rpca_L1 (
7777 self , D : NDArray , Omega : NDArray , lam : float , tau : float , rank : int
@@ -110,8 +110,8 @@ def decompose_rpca_L1(
110110 """
111111 m , n = D .shape
112112 rho = 1.1
113- mu = 1e-2
114- mu_bar = mu * 1e10
113+ mu = self . mu or 1e-2
114+ mu_bar = mu * 1e3
115115
116116 # init
117117 Y = np .ones ((m , n ))
@@ -122,20 +122,17 @@ def decompose_rpca_L1(
122122 L = np .ones ((m , rank ))
123123 Q = np .ones ((n , rank ))
124124 R = [np .ones ((m , n - period )) for period in self .list_periods ]
125- # temporal correlations
126- H = [rpca_utils .toeplitz_matrix (period , n , model = "column" ) for period in self .list_periods ]
127125
128- ##
126+ # matrices for temporal correlation
127+ H = [rpca_utils .toeplitz_matrix (period , n , model = "column" ) for period in self .list_periods ]
129128 HHT = np .zeros ((n , n ))
130129 for index , _ in enumerate (self .list_periods ):
131130 HHT += self .list_etas [index ] * (H [index ] @ H [index ].T )
132131
133132 Ir = np .eye (rank )
134133 In = np .eye (n )
135134
136- increments = np .full ((self .max_iterations ,), np .nan , dtype = float )
137-
138- for iteration in range (self .max_iterations ):
135+ for _ in range (self .max_iterations ):
139136 X_temp = X .copy ()
140137 A_temp = A .copy ()
141138 L_temp = L .copy ()
@@ -189,7 +186,6 @@ def decompose_rpca_L1(
189186 for index , _ in enumerate (self .list_periods ):
190187 Rc = np .maximum (Rc , np .linalg .norm (R [index ] - R_temp [index ], np .inf ))
191188 tol = np .amax (np .array ([Xc , Ac , Lc , Qc , Rc ]))
192- increments [iteration ] = tol
193189
194190 if tol < self .tol :
195191 break
@@ -202,7 +198,7 @@ def decompose_rpca_L2(
202198 self , D : NDArray , Omega : NDArray , lam : float , tau : float , rank : int
203199 ) -> Tuple :
204200 """
205- Compute the noisy RPCA with a L1 time penalisation
201+ Compute the noisy RPCA with a L2 time penalisation
206202
207203 Parameters
208204 ----------
@@ -237,14 +233,18 @@ def decompose_rpca_L2(
237233 m , n = D .shape
238234
239235 # init
240- Y = np .zeros (( m , n ) )
236+ Y = np .full_like ( D , 0 )
241237 X = D .copy ()
242- A = np .zeros ((m , n ))
243- L = np .ones ((m , rank ))
244- Q = np .ones ((n , rank ))
238+ A = np .full_like (D , 0 )
239+ U , S , Vt = np .linalg .svd (X )
240+ U = U [:, :rank ]
241+ S = S [:rank ]
242+ Vt = Vt [:rank , :]
243+ L = U @ np .diag (np .sqrt (S ))
244+ Q = Vt .transpose () @ np .diag (np .sqrt (S ))
245245
246- mu = 1e-2
247- mu_bar = mu * 1e10
246+ mu = self . mu or 1e-2
247+ mu_bar = mu * 1e3
248248
249249 # matrices for temporal correlation
250250 H = [rpca_utils .toeplitz_matrix (period , n , model = "column" ) for period in self .list_periods ]
@@ -255,14 +255,7 @@ def decompose_rpca_L2(
255255 Ir = np .eye (rank )
256256 In = np .eye (n )
257257
258- increment = np .full ((self .max_iterations ,), np .nan , dtype = float )
259- errors_ano = []
260- errors_nuclear = []
261- errors_noise = []
262- errors_lagrange = []
263- self .list_report = []
264-
265- for iteration in range (self .max_iterations ):
258+ for _ in range (self .max_iterations ):
266259 X_temp = X .copy ()
267260 A_temp = A .copy ()
268261 L_temp = L .copy ()
@@ -273,10 +266,10 @@ def decompose_rpca_L2(
273266 b = (D - A + mu * L @ Q .T - Y ).T ,
274267 ).T
275268
276- if np .any (~ Omega ):
277- A_omega = rpca_utils .soft_thresholding (D - X , lam )
278- A_omega_C = D - X
279- A = np .where (Omega , A_omega , A_omega_C )
269+ if np .any (np . isnan ( D ) ):
270+ A_Omega = rpca_utils .soft_thresholding (D - X , lam )
271+ A_Omega_C = D - X
272+ A = np .where (Omega , A_Omega , A_Omega_C )
280273 else :
281274 A = rpca_utils .soft_thresholding (D - X , lam )
282275
@@ -300,43 +293,10 @@ def decompose_rpca_L2(
300293 Qc = np .linalg .norm (Q - Q_temp , np .inf )
301294
302295 tol = max ([Xc , Ac , Lc , Qc ])
303- increment [iteration ] = tol
304-
305- _ , values_singular , _ = np .linalg .svd (X , full_matrices = True )
306- errors_ano .append (np .sum (np .abs (A )))
307- errors_nuclear .append (np .sum (values_singular ))
308- errors_noise .append (np .sum ((D - X - A ) ** 2 ))
309- errors_lagrange .append (np .sum ((X - L @ Q .T ) ** 2 ))
310-
311- if self .do_report :
312- self .list_report .append ((D , X , A ))
313296
314297 if tol < self .tol :
315298 break
316299
317- if self .do_report :
318- errors_ano_np = np .array (errors_ano )
319- errors_nuclear_np = np .array (errors_nuclear )
320- errors_noise_np = np .array (errors_noise )
321- errors_lagrange_np = np .array (errors_lagrange )
322-
323- plt .plot (lam * errors_ano_np , label = "Cost (ano)" )
324- plt .plot (tau * errors_nuclear_np , label = "Cost (SV)" )
325- plt .plot (0.5 * errors_noise_np , label = "Cost (noise)" )
326- plt .plot (errors_lagrange_np , label = "Cost (Lagrange)" )
327- plt .plot (
328- lam * errors_ano_np + tau * errors_nuclear_np + errors_noise_np ,
329- label = "Total" ,
330- color = "black" ,
331- )
332- plt .yscale ("log" )
333- # plt.gca().twinx()
334- # plt.plot(errors_cv, color="black")
335- plt .grid ()
336- plt .yscale ("log" )
337- plt .legend ()
338- plt .show ()
339-
340300 X = L @ Q .T
341301
342302 M = X
@@ -411,7 +371,58 @@ def decompose_rpca(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
411371
412372 if self .norm == "L1" :
413373 M , A , U , V = self .decompose_rpca_L1 (D , Omega , lam , tau , rank )
374+
414375 elif self .norm == "L2" :
415376 M , A , U , V = self .decompose_rpca_L2 (D , Omega , lam , tau , rank )
416377
378+ self ._check_cost_function_minimized (D , M , A , tau , lam , self .norm )
379+
417380 return M , A
381+
382+ @staticmethod
383+ def _check_cost_function_minimized (
384+ observations : NDArray ,
385+ low_rank : NDArray ,
386+ anomalies : NDArray ,
387+ tau : float ,
388+ lam : float ,
389+ norm : str ,
390+ ):
391+ """Check that the functional minimized by the RPCA
392+ is smaller at the end than at the beginning
393+
394+ Parameters
395+ ----------
396+ observations : NDArray
397+ observations matrix with first linear interpolation
398+ low_rank : NDArray
399+ low_rank matrix resulting from RPCA
400+ anomalies : NDArray
401+ sparse matrix resulting from RPCA
402+ tau : float
403+ parameter penalizing the nuclear norm of the low rank part
404+ lam : float
405+ parameter penalizing the L1-norm of the anomaly/sparse part
406+ norm : str
407+ norm of the temporal penalisation. Has to be `L1` or `L2`
408+
409+ Raises
410+ ------
411+ CostFunctionRPCANotMinimized
412+ The RPCA does not minimized the cost function:
413+ the starting cost is at least equal to the final one.
414+ """
415+ value_start = tau * np .linalg .norm (observations , "nuc" )
416+ if norm == "L1" :
417+ anomalies_norm = np .sum (np .abs (anomalies ))
418+ function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_1"
419+ elif norm == "L2" :
420+ anomalies_norm = np .sum (anomalies ** 2 )
421+ function_str = "||D-M-A||_2 + tau ||D||_* + lam ||A||_2"
422+ value_end = (
423+ np .sum ((observations - low_rank - anomalies ) ** 2 )
424+ + tau * np .linalg .norm (low_rank , "nuc" )
425+ + lam * anomalies_norm
426+ )
427+ if value_start + 1e-4 <= value_end :
428+ raise CostFunctionRPCANotMinimized (function_str , float (value_start ), float (value_end ))
0 commit comments