Skip to content

Commit c9d68de

Browse files
Julien RousselJulien Roussel
authored andcommitted
clean notebooks
1 parent 265913f commit c9d68de

File tree

4 files changed

+21
-95
lines changed

4 files changed

+21
-95
lines changed

examples/1_timeSeries.ipynb

Lines changed: 12 additions & 24 deletions
Large diffs are not rendered by default.

qolmat/imputations/em_sampler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def fit(self, X: np.array):
231231
X_sample_last = self._sample_ou(X_sample_last, mask_na)
232232

233233
if self._check_convergence():
234-
print(f"EM converged after {iter_em} iterations.")
234+
# print(f"EM converged after {iter_em} iterations.")
235235
break
236236

237237
self.dict_criteria_stop = {key: [] for key in self.dict_criteria_stop}

qolmat/imputations/imputers.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -739,12 +739,8 @@ def fit_transform_element(self, df: pd.DataFrame) -> pd.DataFrame:
739739
model = TemporalRPCA(**self.hyperparams_element)
740740
elif self.method == "onlinetemporal":
741741
model = OnlineTemporalRPCA(**self.hyperparams_element)
742-
743-
print(type(model))
744742

745743
X_imputed = model.fit_transform(df.values)
746-
print("X_imputed.shape")
747-
print(X_imputed.shape)
748744
df_imputed = pd.DataFrame(X_imputed, index=df.index, columns=df.columns)
749745

750746
return df_imputed

qolmat/notebooks/benchmark.md

Lines changed: 8 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ df["Sales"] = df['Sales'].astype(float)
7272
cols_to_impute = ["Sales"]
7373

7474
```python
75-
download = False
75+
download = True
7676
df_data = data.get_data_corrupted(download=download, ratio_masked=.2, mean_size=120 , groups=["station"])
7777

7878
# cols_to_impute = ["TEMP", "PRES", "DEWP", "NO2", "CO", "O3", "WSPM"]
@@ -84,67 +84,9 @@ cols_to_impute = ["TEMP", "PRES"]
8484
Let's take a look at variables to impute. We only consider a station, Aotizhongxin.
8585
Time series display seasonalities (roughly 12 months).
8686

87-
```python
88-
df_data
89-
```
90-
91-
```python
92-
df0 = df_data
93-
```
94-
95-
```python
96-
# df_data = df0[df0.index.get_level_values("station").isin(["Gucheng"])]
97-
# df_data = df0[df0.index.get_level_values("station").isin(["Gucheng", "Aotizhongxin"])]
98-
```
99-
100-
```python
101-
n_stations = len(df_data.groupby("station").size())
102-
n_cols = len(cols_to_impute)
103-
```
104-
105-
```python
106-
fig = plt.figure(figsize=(10 * n_stations, 2 * n_cols))
107-
for i_station, (station, df) in enumerate(df_data.groupby("station")):
108-
for i_col, col in enumerate(cols_to_impute):
109-
fig.add_subplot(n_cols, n_stations, i_col * n_stations + i_station + 1)
110-
plt.plot(df.reset_index().datetime, df[col], '.', label=station)
111-
# break
112-
plt.ylabel(col, fontsize=12)
113-
if i_col == 0:
114-
plt.title(station)
115-
plt.show()
116-
```
11787

11888
### **II. Imputation methods**
11989

120-
```python
121-
station = "Gucheng"
122-
df_data = df0[df0.index.get_level_values("station").isin([station])]
123-
df_data = df_data[["TEMP"]]
124-
```
125-
126-
```python
127-
# imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="PCP", columnwise=True, period=365, max_iter=1000)
128-
imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=True, max_iter=1000, period=10, tau=2, lam=0.3, list_periods=[10], list_etas=[0.01], norm="L2")
129-
130-
```
131-
132-
```python
133-
df_data.values.size
134-
```
135-
136-
```python
137-
df_imputed = imputer_rpca.fit_transform(df_data)
138-
```
139-
140-
```python
141-
df_imputed.iloc[:365 * (df_imputed.size // 365)]
142-
```
143-
144-
```python
145-
plt.plot(df_data.loc["Wonderland"], ".", color="black")
146-
plt.plot(df_imputed.loc["Wonderland"])
147-
```
14890

14991
This part is devoted to the imputation methods. The idea is to try different algorithms and compare them.
15092

@@ -175,8 +117,8 @@ imputer_residuals = imputers.ImputerResiduals(groups=["station"], period=7, mode
175117
# imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=False, n_rows=7*4, max_iter=1000, tau=1, lam=0.7)
176118
dict_tau = {"TEMP": 1, "PRES": 1.1}
177119
dict_lam = {"TEMP": 0.7, "PRES": 0.8}
178-
imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=True, n_rows=7*4, max_iter=1000, tau=dict_tau, lam=dict_lam)
179-
imputer_rpca_opti = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=True, n_rows=7*4, max_iter=1000)
120+
imputer_rpca = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=True, period=365, max_iter=1000, tau=dict_tau, lam=dict_lam)
121+
imputer_rpca_opti = imputers.ImputerRPCA(groups=["station"], method="temporal", columnwise=True, n_rows=365, max_iter=1000)
180122

181123
imputer_ou = imputers.ImputeEM(groups=["station"], method="multinormal", max_iter_em=34, n_iter_ou=15, strategy="ou")
182124
imputer_tsou = imputers.ImputeEM(groups=["station"], method="VAR1", strategy="ou", max_iter_em=34, n_iter_ou=15)
@@ -197,15 +139,15 @@ dict_imputers = {
197139
"mean": imputer_mean,
198140
# "median": imputer_median,
199141
# "mode": imputer_mode,
200-
# "interpolation": imputer_interpol,
142+
"interpolation": imputer_interpol,
201143
# "spline": imputer_spline,
202144
# "shuffle": imputer_shuffle,
203145
# "residuals": imputer_residuals,
204-
# "OU": imputer_ou,
205-
# "TSOU": imputer_tsou,
206-
# "TSMLE": imputer_tsmle,
146+
"OU": imputer_ou,
147+
"TSOU": imputer_tsou,
148+
"TSMLE": imputer_tsmle,
207149
"RPCA": imputer_rpca,
208-
"RPCA_opti": imputer_rpca_opti,
150+
# "RPCA_opti": imputer_rpca_opti,
209151
# "locf": imputer_locf,
210152
# "nocb": imputer_nocb,
211153
# "knn": imputer_knn,

0 commit comments

Comments
 (0)