Skip to content

Commit 0ba935c

Browse files
Julien RousselJulien Roussel
authored andcommitted
RPCA PCP not imputing
1 parent 65ff5fb commit 0ba935c

File tree

4 files changed

+47
-55
lines changed

4 files changed

+47
-55
lines changed

examples/1_timeSeries.ipynb

Lines changed: 28 additions & 20 deletions
Large diffs are not rendered by default.

qolmat/benchmark/comparator.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,9 @@
44
import numpy as np
55
import pandas as pd
66

7-
from qolmat import logging as qlog
87
from qolmat.benchmark import cross_validation, utils
98
from qolmat.benchmark.missing_patterns import _HoleGenerator
109

11-
qlog.log_setup()
12-
logger = logging.getLogger(__name__)
13-
# logger.setLevel(logging.DEBUG)
14-
1510

1611
class Comparator:
1712
"""
@@ -153,7 +148,6 @@ def compare(self, df: pd.DataFrame, verbose: bool = True):
153148
dict_errors = {}
154149

155150
for name, imputer in self.dict_models.items():
156-
logger.setLevel(logging.DEBUG)
157151
print(f"Tested model: {type(imputer).__name__}")
158152

159153
search_params = self.search_params.get(name, {})

qolmat/imputations/rpca/pcp_rpca.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,18 @@ def get_params_scale(self, D):
5959
dict_params = {"mu": mu, "lam": lam}
6060
return dict_params
6161

62-
def decompose_rpca(self, D: NDArray, mu:float, lam: float) -> Tuple[NDArray, NDArray]:
62+
def decompose_rpca(self, D: NDArray) -> Tuple[NDArray, NDArray]:
63+
proj_D = utils.impute_nans(D, method="median")
64+
65+
params_scale = self.get_params_scale(proj_D)
66+
67+
mu = params_scale["mu"] if self.mu is None else self.mu
68+
lam = params_scale["lam"] if self.lam is None else self.lam
69+
Omega = ~np.isnan(D)
70+
71+
print("mu:", mu)
72+
print("lam:", lam)
73+
6374
D_norm = np.linalg.norm(D, "fro")
6475

6576
A = np.full_like(D, 0)
@@ -69,15 +80,15 @@ def decompose_rpca(self, D: NDArray, mu:float, lam: float) -> Tuple[NDArray, NDA
6980

7081
for iteration in range(self.max_iter):
7182

72-
M = utils.svd_thresholding(D - A + Y/mu, 1/mu)
73-
A = utils.soft_thresholding(D - M + Y/mu, lam/mu)
74-
Y += mu * (D - M - A)
83+
M = utils.svd_thresholding(proj_D - A + Y/mu, 1/mu)
84+
A = utils.soft_thresholding(proj_D - M + Y/mu, lam/mu)
85+
A[~Omega] = (proj_D - M)[~Omega]
86+
Y += mu * (proj_D - M - A)
7587

7688
error = np.linalg.norm(D - M - A, "fro")/D_norm
7789
errors[iteration] = error
7890

7991
if error < self.tol:
80-
print(iteration, ":", error, "vs", self.tol)
8192
if self.verbose:
8293
print(f"Converged in {iteration} iterations")
8394
break
@@ -110,20 +121,8 @@ def fit_transform(
110121
Array of iterative errors
111122
"""
112123
X = X.copy().T
113-
D_init = self._prepare_data(X)
114-
print("D_init")
115-
print(D_init.shape)
116-
proj_D = utils.impute_nans(D_init, method="median")
117-
118-
params_scale = self.get_params_scale(proj_D)
119-
120-
mu = params_scale["mu"] if self.mu is None else self.mu
121-
lam = params_scale["lam"] if self.lam is None else self.lam
122-
123-
print("mu:", mu)
124-
print("lam:", lam)
125-
126-
M, A = self.decompose_rpca(proj_D, mu, lam)
124+
D = self._prepare_data(X)
125+
M, A = self.decompose_rpca(D)
127126

128127
# U, _, V = np.linalg.svd(M, full_matrices=False, compute_uv=True)
129128

qolmat/notebooks/benchmark.md

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -136,27 +136,18 @@ df_data.values.size
136136
df_imputed = imputer_rpca.fit_transform(df_data)
137137
```
138138

139-
```python
140-
df_imputed.shape
141-
```
142-
143139
```python
144140
df_imputed.iloc[:365 * (df_imputed.size // 365)]
145141
```
146142

147143
```python
148-
D = df_imputed.iloc[:365 * (df_imputed.size // 365)].values.reshape(365, -1)
144+
D = df_imputed.iloc[:365 * (df_imputed.size // 365)].values.reshape(-1, 365).T
149145
```
150146

151147
```python
152148
plt.plot(D)
153149
```
154150

155-
```python
156-
plt.plot(df_data["TEMP"].loc[station], ".", color="black")
157-
plt.plot(df_imputed["TEMP"].loc[station])
158-
```
159-
160151
This part is devoted to the imputation methods. The idea is to try different algorithms and compare them.
161152

162153
<u>**Methods**</u>:

0 commit comments

Comments
 (0)