Skip to content

Commit 265913f

Browse files
Julien RousselJulien Roussel
authored andcommitted
fictive data added to data.py
1 parent 0ba935c commit 265913f

File tree

6 files changed

+39
-84
lines changed

6 files changed

+39
-84
lines changed

examples/1_timeSeries.ipynb

Lines changed: 31 additions & 64 deletions
Large diffs are not rendered by default.

qolmat/imputations/rpca/pcp_rpca.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ def get_params(self):
5353
return dict_params
5454

5555
def get_params_scale(self, D):
56-
print(D.size, D.shape)
5756
mu = D.size / (4.0 * utils.l1_norm(D))
5857
lam = 1 / np.sqrt(np.max(D.shape))
5958
dict_params = {"mu": mu, "lam": lam}
@@ -67,9 +66,6 @@ def decompose_rpca(self, D: NDArray) -> Tuple[NDArray, NDArray]:
6766
mu = params_scale["mu"] if self.mu is None else self.mu
6867
lam = params_scale["lam"] if self.lam is None else self.lam
6968
Omega = ~np.isnan(D)
70-
71-
print("mu:", mu)
72-
print("lam:", lam)
7369

7470
D_norm = np.linalg.norm(D, "fro")
7571

@@ -126,15 +122,12 @@ def fit_transform(
126122

127123
# U, _, V = np.linalg.svd(M, full_matrices=False, compute_uv=True)
128124

129-
print("end")
130-
print(M.shape)
131125
if X.shape[0] == 1:
132126
M = M.reshape(1, -1)[:, :X.size]
133127
A = A.reshape(1, -1)[:, :X.size]
134128
M = M.T
135129
A = A.T
136130
# return M, A, U, V, errors
137-
print(M.shape)
138131
return M
139132

140133

qolmat/imputations/rpca/rpca.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ def _prepare_data(
4848
"""
4949
Transform signal to 2D-array in case of 1D-array.
5050
"""
51-
print("shape:", X.shape)
5251
n_rows_X, n_cols_X = X.shape
5352
if n_rows_X == 1:
5453
if self.n_rows is None:

qolmat/imputations/rpca/temporal_rpca.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,6 @@ def fit_transform(
320320
"""
321321
X = X.copy().T
322322
D_init = self._prepare_data(X)
323-
print("D")
324-
print(D_init.shape)
325323
omega = ~np.isnan(D_init)
326324
proj_D = utils.impute_nans(D_init, method="median")
327325

qolmat/notebooks/benchmark.md

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ df["Sales"] = df['Sales'].astype(float)
7272
cols_to_impute = ["Sales"]
7373

7474
```python
75-
download = True
75+
download = False
7676
df_data = data.get_data_corrupted(download=download, ratio_masked=.2, mean_size=120 , groups=["station"])
7777

7878
# cols_to_impute = ["TEMP", "PRES", "DEWP", "NO2", "CO", "O3", "WSPM"]
@@ -94,7 +94,7 @@ df0 = df_data
9494

9595
```python
9696
# df_data = df0[df0.index.get_level_values("station").isin(["Gucheng"])]
97-
df_data = df0[df0.index.get_level_values("station").isin(["Gucheng", "Aotizhongxin"])]
97+
# df_data = df0[df0.index.get_level_values("station").isin(["Gucheng", "Aotizhongxin"])]
9898
```
9999

100100
```python
@@ -124,8 +124,9 @@ df_data = df_data[["TEMP"]]
124124
```
125125

126126
```python
127-
imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="PCP", columnwise=True, period=365, max_iter=1000)
128-
# imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=True, n_rows=365, max_iter=1000, tau=1, lam=0.7)
127+
# imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="PCP", columnwise=True, period=365, max_iter=1000)
128+
imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=True, max_iter=1000, period=10, tau=2, lam=0.3, list_periods=[10], list_etas=[0.01], norm="L2")
129+
129130
```
130131

131132
```python
@@ -141,11 +142,8 @@ df_imputed.iloc[:365 * (df_imputed.size // 365)]
141142
```
142143

143144
```python
144-
D = df_imputed.iloc[:365 * (df_imputed.size // 365)].values.reshape(-1, 365).T
145-
```
146-
147-
```python
148-
plt.plot(D)
145+
plt.plot(df_data.loc["Wonderland"], ".", color="black")
146+
plt.plot(df_imputed.loc["Wonderland"])
149147
```
150148

151149
This part is devoted to the imputation methods. The idea is to try different algorithms and compare them.

qolmat/utils/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_data(datapath: str = "data/", download: Optional[bool] = True):
4444
return df
4545
else:
4646
city = "Wonderland"
47-
x = np.linspace(0, 4 * np.pi, 200)
47+
x = np.linspace(0, 40 * np.pi, 1000)
4848
y = 3 + np.sin(x) + np.random.random(len(x)) * 0.2
4949
datelist = pd.date_range(datetime(2013, 3, 1), periods=len(y)).tolist()
5050
dataset = pd.DataFrame({"var": y, "datetime": datelist[: len(y)], "station": city})

0 commit comments

Comments
 (0)