Skip to content

Commit 4f0218a

Browse files
Julien RousselJulien Roussel
authored andcommitted
Merge branch 'dev' into angoho_benchmarks
2 parents 4b88cd4 + ea44694 commit 4f0218a

25 files changed

+1878
-1185
lines changed

HISTORY.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@
22
History
33
=======
44

5-
0.0.X (2023-0X-XX)
5+
0.0.15 (2023-??-??)
66
-------------------
7-
* Fix MLP imputer
87

8+
* Hyperparameters are now optimized in hyperparameters.py, with the maintained module hyperopt
9+
* The Imputer classes do not possess a dictionary attribute anymore, and all list attributes have
10+
been changed into tuple attributes so that all are not immutable
11+
* All the tests from scikit-learn's check_estimator now pass for the class Imputer
12+
* Fix MLP imputer
913

1014
0.0.14 (2023-06-14)
1115
-------------------

examples/RPCA.md

Lines changed: 2 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jupyter:
66
extension: .md
77
format_name: markdown
88
format_version: '1.3'
9-
jupytext_version: 1.14.5
9+
jupytext_version: 1.14.4
1010
kernelspec:
1111
display_name: Python 3 (ipykernel)
1212
language: python
@@ -74,7 +74,7 @@ plt.show()
7474

7575
```python
7676
%%time
77-
rpca_pcp = RPCAPCP(period=100, max_iter=5, mu=.5, lam=1)
77+
rpca_pcp = RPCAPCP(period=100, max_iterations=5, mu=.5, lam=1)
7878
X, A = rpca_pcp.decompose_rpca_signal(signal)
7979
imputed = signal - A
8080
```
@@ -102,52 +102,3 @@ plt.plot(imputed)
102102
```python
103103

104104
```
105-
106-
```python
107-
%%time
108-
signal_toy = np.array([[1, 2], [np.nan, np.nan]])
109-
rpca_noisy = RPCANoisy(tau=0, lam=1, norm="L2", do_report=True)
110-
X, A = rpca_noisy.decompose_rpca_signal(signal_toy)
111-
```
112-
113-
```python
114-
print(X)
115-
print(A)
116-
```
117-
118-
```python
119-
%%time
120-
signal_toy = np.array([[1, 2], [np.nan, np.nan]])
121-
rpca_pcp = RPCAPCP(lam=1e3)
122-
X, A = rpca_pcp.decompose_rpca_signal(signal_toy)
123-
```
124-
125-
```python
126-
X
127-
```
128-
129-
```python
130-
A
131-
```
132-
133-
```python
134-
np.log(10) / np.log(1.1)
135-
```
136-
137-
```python
138-
X = np.array([[1, 2], [4, 4], [4, 3]])
139-
# Omega = np.array([[True, False], [True, True], [False, True]])
140-
Omega = np.array([[True, True], [True, True], [True, True]])
141-
rpca_noisy = RPCANoisy(period=2, max_iter=200, tau=.5, lam=1, do_report=True)
142-
M_result, A_result, U_result, V_result = rpca_noisy.decompose_rpca_L2(
143-
X, Omega=Omega, lam=1, tau=.5, rank=2
144-
)
145-
```
146-
147-
```python
148-
M_result
149-
```
150-
151-
```python
152-
153-
```

examples/benchmark.md

Lines changed: 58 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,18 @@ In Qolmat, a few data imputation methods are implemented as well as a way to eva
1919

2020
First, import some useful librairies
2121

22+
```python
23+
X= np.array([[0], [1], [2]])
24+
```
25+
26+
```python
27+
np.cov(X)
28+
```
29+
30+
```python
31+
32+
```
33+
2234
```python
2335
import warnings
2436
# warnings.filterwarnings('error')
@@ -116,52 +128,70 @@ ratio_masked = 0.1
116128
```
117129

118130
```python
119-
imputer_mean = imputers.ImputerMean(groups=["station"])
120-
imputer_median = imputers.ImputerMedian(groups=["station"])
121-
imputer_mode = imputers.ImputerMode(groups=["station"])
122-
imputer_locf = imputers.ImputerLOCF(groups=["station"])
123-
imputer_nocb = imputers.ImputerNOCB(groups=["station"])
124-
imputer_interpol = imputers.ImputerInterpolation(groups=["station"], method="linear")
125-
imputer_spline = imputers.ImputerInterpolation(groups=["station"], method="spline", order=2)
126-
imputer_shuffle = imputers.ImputerShuffle(groups=["station"])
127-
imputer_residuals = imputers.ImputerResiduals(groups=["station"], period=365, model_tsa="additive", extrapolate_trend="freq", method_interpolation="linear")
128-
129-
imputer_rpca = imputers.ImputerRPCA(groups=["station"], columnwise=False, max_iter=256, tau=2, lam=1)
130-
131-
imputer_ou = imputers.ImputerEM(groups=["station"], model="multinormal", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
132-
imputer_tsou = imputers.ImputerEM(groups=["station"], model="VAR1", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
133-
imputer_tsmle = imputers.ImputerEM(groups=["station"], model="VAR1", method="mle", max_iter_em=100, n_iter_ou=15, dt=1e-3)
134-
135-
136-
imputer_knn = imputers.ImputerKNN(groups=["station"], k=10)
137-
imputer_mice = imputers.ImputerMICE(groups=["station"], estimator=LinearRegression(), sample_posterior=False, max_iter=100, missing_values=np.nan)
138-
imputer_regressor = imputers.ImputerRegressor(groups=["station"], estimator=LinearRegression())
131+
imputer_mean = imputers.ImputerMean(groups=("station",))
132+
imputer_median = imputers.ImputerMedian(groups=("station",))
133+
imputer_mode = imputers.ImputerMode(groups=("station",))
134+
imputer_locf = imputers.ImputerLOCF(groups=("station",))
135+
imputer_nocb = imputers.ImputerNOCB(groups=("station",))
136+
imputer_interpol = imputers.ImputerInterpolation(groups=("station",), method="linear")
137+
imputer_spline = imputers.ImputerInterpolation(groups=("station",), method="spline", order=2)
138+
imputer_shuffle = imputers.ImputerShuffle(groups=("station",))
139+
imputer_residuals = imputers.ImputerResiduals(groups=("station",), period=365, model_tsa="additive", extrapolate_trend="freq", method_interpolation="linear")
140+
141+
imputer_rpca = imputers.ImputerRPCA(groups=("station",), columnwise=False, max_iterations=256, tau=2, lam=1)
142+
143+
imputer_ou = imputers.ImputerEM(groups=("station",), model="multinormal", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
144+
imputer_tsou = imputers.ImputerEM(groups=("station",), model="VAR1", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
145+
imputer_tsmle = imputers.ImputerEM(groups=("station",), model="VAR1", method="mle", max_iter_em=100, n_iter_ou=15, dt=1e-3)
146+
147+
148+
imputer_knn = imputers.ImputerKNN(groups=("station",), n_neighbors=10)
149+
imputer_mice = imputers.ImputerMICE(groups=("station",), estimator=LinearRegression(), sample_posterior=False, max_iter=100, missing_values=np.nan)
150+
imputer_regressor = imputers.ImputerRegressor(groups=("station",), estimator=LinearRegression())
139151
```
140152

141153
```python
142-
generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits=2, groups=["station"], subset=cols_to_impute, ratio_masked=ratio_masked)
154+
generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits=2, groups=("station",), subset=cols_to_impute, ratio_masked=ratio_masked)
143155
```
144156

145157
```python
146158
dict_config_opti = {
147159
"tau": ho.hp.uniform("tau", low=.5, high=5),
148160
"lam": ho.hp.uniform("lam", low=.1, high=1),
149161
}
150-
imputer_rpca_opti = imputers.ImputerRPCA(groups=["station"], columnwise=False, max_iter=256)
162+
imputer_rpca_opti = imputers.ImputerRPCA(groups=("station",), columnwise=False, max_iterations=256)
151163
imputer_rpca_opti = hyperparameters.optimize(
152164
imputer_rpca_opti,
153165
df_data,
154166
generator = generator_holes,
155167
metric="mae",
156168
max_evals=10,
157-
dict_config_opti=dict_config_opti
169+
dict_spaces=dict_config_opti
158170
)
159171
# imputer_rpca_opti.params_optim = hyperparams_opti
160172
```
161173

174+
```python
175+
dict_config_opti2 = {
176+
"tau/TEMP": ho.hp.uniform("tau/TEMP", low=.5, high=5),
177+
"tau/PRES": ho.hp.uniform("tau/PRES", low=.5, high=5),
178+
"lam/TEMP": ho.hp.uniform("lam/TEMP", low=.1, high=1),
179+
"lam/PRES": ho.hp.uniform("lam/PRES", low=.1, high=1),
180+
}
181+
imputer_rpca_opti2 = imputers.ImputerRPCA(groups=("station",), columnwise=True, max_iterations=256)
182+
imputer_rpca_opti2 = hyperparameters.optimize(
183+
imputer_rpca_opti2,
184+
df_data,
185+
generator = generator_holes,
186+
metric="mae",
187+
max_evals=10,
188+
dict_spaces=dict_config_opti2
189+
)
190+
```
191+
162192
```python
163193
dict_imputers = {
164-
# "mean": imputer_mean,
194+
"mean": imputer_mean,
165195
# "median": imputer_median,
166196
# "mode": imputer_mode,
167197
"interpolation": imputer_interpol,
@@ -171,8 +201,9 @@ dict_imputers = {
171201
# "OU": imputer_ou,
172202
"TSOU": imputer_tsou,
173203
"TSMLE": imputer_tsmle,
174-
"RPCA": imputer_rpca,
175-
"RPCA_opti": imputer_rpca_opti,
204+
# "RPCA": imputer_rpca,
205+
# "RPCA_opti": imputer_rpca_opti,
206+
# "RPCA_opti2": imputer_rpca_opti2,
176207
# "locf": imputer_locf,
177208
# "nocb": imputer_nocb,
178209
# "knn": imputer_knn,
@@ -308,7 +339,7 @@ for i_col, col in enumerate(cols_to_impute):
308339
loc = plticker.MultipleLocator(base=2*365)
309340
ax.xaxis.set_major_locator(loc)
310341
ax.tick_params(axis='both', which='major')
311-
plt.xlim(datetime(2010, 1, 1), datetime(2015, 3, 1))
342+
# plt.xlim(datetime(2019, 2, 1), datetime(2019, 3, 1))
312343
i_plot += 1
313344
plt.savefig("figures/imputations_benchmark.png")
314345
plt.show()

0 commit comments

Comments
 (0)