Skip to content

Commit 9b77d4e

Browse files
Julien RousselJulien Roussel
authored andcommitted
tests patched, skopt deprecated
1 parent 3fb28ae commit 9b77d4e

File tree

17 files changed

+135
-157
lines changed

17 files changed

+135
-157
lines changed

environment.ci.yml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ dependencies:
66
- pip=23.0.1
77
- pip:
88
- codecov
9-
- flake8==6.0.0
10-
- matplotlib==3.6.2
11-
- mypy==1.1.1
12-
- numpydoc==1.5.0
13-
- pytest==7.2.0
14-
- pytest-cov==4.0.0
15-
- pytest-mock==3.10.0
9+
- flake8
10+
- matplotlib
11+
- mypy
12+
- numpy==1.19
13+
- numpydoc
14+
- pytest
15+
- pytest-cov
16+
- pytest-mock
1617
- tensorflow
1718
- -e .

environment.dev.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ channels:
55
dependencies:
66
- bump2version=1.0.1
77
- dcor=0.6
8-
- ipykernel=5.1.4
8+
- ipykernel=6.21.0
99
- jupyter=1.0.0
1010
- jupyterlab=1.2.6
1111
- jupytext=1.14.4
1212
- numpy=1.21
1313
- packaging=23.1
1414
- pandas=2.0.1
15+
- python=3.8
1516
- pip=23.0.1
1617
- scipy=1.10.1
1718
- scikit-learn=1.2.2

examples/benchmark.md

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ jupyter:
88
format_version: '1.3'
99
jupytext_version: 1.14.5
1010
kernelspec:
11-
display_name: Python 3 (ipykernel)
11+
display_name: env_qolmat_dev
1212
language: python
13-
name: python3
13+
name: env_qolmat_dev
1414
---
1515

1616
**This notebook aims to present the Qolmat repo through an example of a multivariate time series.
@@ -62,24 +62,24 @@ The dataset `Beijing` is the Beijing Multi-Site Air-Quality Data Set. It consist
6262
This dataset only contains numerical vairables.
6363

6464
```python
65-
# df_data = data.get_data_corrupted("Beijing", ratio_masked=.2, mean_size=120)
65+
df_data = data.get_data_corrupted("Beijing", ratio_masked=.2, mean_size=120)
6666

6767
# cols_to_impute = ["TEMP", "PRES", "DEWP", "NO2", "CO", "O3", "WSPM"]
6868
# cols_to_impute = df_data.columns[df_data.isna().any()]
69-
# cols_to_impute = ["TEMP", "PRES"]
69+
cols_to_impute = ["TEMP", "PRES"]
7070

7171
```
7272

7373
The dataset `Artificial` is designed to have a sum of a periodical signal, a white noise and some outliers.
7474

7575
```python
76-
df_data = data.get_data_corrupted("Artificial", ratio_masked=.2, mean_size=10)
77-
cols_to_impute = ["signal"]
76+
# df_data = data.get_data_corrupted("Artificial", ratio_masked=.2, mean_size=10)
77+
# cols_to_impute = ["signal"]
7878
```
7979

8080
```python
81-
df_data = data.get_data("SNCF", n_groups_max=2)
82-
cols_to_impute = ["val_in"]
81+
# df_data = data.get_data("SNCF", n_groups_max=2)
82+
# cols_to_impute = ["val_in"]
8383
```
8484

8585
```python
@@ -132,14 +132,14 @@ imputer_nocb = imputers.ImputerNOCB(groups=["station"])
132132
imputer_interpol = imputers.ImputerInterpolation(groups=["station"], method="linear")
133133
imputer_spline = imputers.ImputerInterpolation(groups=["station"], method="spline", order=2)
134134
imputer_shuffle = imputers.ImputerShuffle(groups=["station"])
135-
imputer_residuals = imputers.ImputerResiduals(groups=["station"], period=7, model_tsa="additive", extrapolate_trend="freq", method_interpolation="linear")
135+
imputer_residuals = imputers.ImputerResiduals(groups=["station"], period=365, model_tsa="additive", extrapolate_trend="freq", method_interpolation="linear")
136136

137-
imputer_rpca = imputers.ImputerRPCA(groups=["station"], columnwise=True, period=7, max_iter=1000, tau=2, lam=1)
137+
imputer_rpca = imputers.ImputerRPCA(groups=["station"], columnwise=False, max_iter=256, tau=2, lam=1)
138138
# imputer_rpca_opti = imputers.ImputerRPCA(groups=["station"], columnwise=True, period=7, max_iter=100)
139139

140140
imputer_ou = imputers.ImputerEM(groups=["station"], model="multinormal", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
141141
imputer_tsou = imputers.ImputerEM(groups=["station"], model="VAR1", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
142-
imputer_tsmle = imputers.ImputerEM(groups=["station"], model="VAR1", method="mle", max_iter_em=100, n_iter_ou=15, dt=1e-3, period=7)
142+
imputer_tsmle = imputers.ImputerEM(groups=["station"], model="VAR1", method="mle", max_iter_em=100, n_iter_ou=15, dt=1e-3)
143143

144144

145145
imputer_knn = imputers.ImputerKNN(groups=["station"], k=10)
@@ -155,7 +155,7 @@ dict_imputers = {
155155
"shuffle": imputer_shuffle,
156156
# "residuals": imputer_residuals,
157157
# "OU": imputer_ou,
158-
# "TSOU": imputer_tsou,
158+
"TSOU": imputer_tsou,
159159
"TSMLE": imputer_tsmle,
160160
"RPCA": imputer_rpca,
161161
# "RPCA_opti": imputer_rpca_opti,
@@ -184,9 +184,6 @@ In order to compare the methods, we $i)$ artificially create missing data (for m
184184
</p>
185185

186186

187-
```python
188-
imputer_tsmle.hyperparams_user
189-
```
190187

191188
Concretely, the comparator takes as input a dataframe to impute, a proportion of nan to create, a dictionary of imputers (those previously mentioned), a list with the columns names to impute, a generator of holes specifying the type of holes to create and the search dictionary search_params for hyperparameter optimization.
192189

qolmat/benchmark/comparator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def get_errors(
6464
df_origin: pd.DataFrame,
6565
df_imputed: pd.DataFrame,
6666
df_mask: pd.DataFrame,
67-
) -> pd.DataFrame:
67+
) -> pd.Series:
6868
"""Functions evaluating the reconstruction's quality
6969
7070
Parameters

qolmat/benchmark/cross_validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def optimize_hyperparams(self, df: pd.DataFrame) -> Dict[str, Any]:
213213
Parameters
214214
----------
215215
df : pd.DataFrame
216-
DataFrame masked
216+
DataFrame with nans
217217
218218
Returns
219219
-------

qolmat/imputations/em_sampler.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def __init__(
165165
tolerance: float = 1e-4,
166166
stagnation_threshold: float = 5e-3,
167167
stagnation_loglik: float = 2,
168-
period: Optional[int] = None,
168+
period: int = 1,
169169
):
170170
if method not in ["mle", "sample"]:
171171
raise ValueError(f"`method` must be 'mle' or 'sample', provided value is '{method}'")
@@ -223,8 +223,8 @@ def fit(self, X: NDArray):
223223
if not isinstance(X, np.ndarray):
224224
raise AssertionError("Invalid type. X must be a NDArray.")
225225

226-
X = self.scaler.fit_transform(X.T).T
227226
X = utils.prepare_data(X, self.period)
227+
X = self.scaler.fit_transform(X.T).T
228228

229229
mask_na = np.isnan(X)
230230

@@ -332,7 +332,7 @@ def __init__(
332332
tolerance: float = 1e-4,
333333
stagnation_threshold: float = 5e-3,
334334
stagnation_loglik: float = 2,
335-
period: Optional[int] = None,
335+
period: int = 1,
336336
) -> None:
337337
super().__init__(
338338
method=method,
@@ -545,7 +545,7 @@ def __init__(
545545
tolerance: float = 1e-4,
546546
stagnation_threshold: float = 5e-3,
547547
stagnation_loglik: float = 2,
548-
period: Optional[int] = None,
548+
period: int = 1,
549549
) -> None:
550550
super().__init__(
551551
method=method,

qolmat/imputations/rpca/rpca.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class RPCA(BaseEstimator, TransformerMixin):
3131

3232
def __init__(
3333
self,
34-
period: Optional[int] = None,
34+
period: int = 1,
3535
max_iter: int = int(1e4),
3636
tol: float = 1e-6,
3737
random_state: Union[None, int, np.random.RandomState] = None,
@@ -60,20 +60,13 @@ def decompose_rpca_signal(
6060
A: NDArray
6161
Anomalies
6262
"""
63-
D_init = utils.prepare_data(X, self.period)
64-
Omega = ~np.isnan(D_init)
63+
D = utils.prepare_data(X, self.period)
64+
Omega = ~np.isnan(D)
6565
# D_proj = rpca_utils.impute_nans(D_init, method="median")
66-
D_proj = D_init.T
67-
D_proj = utils.linear_interpolation(D_proj)
66+
D = utils.linear_interpolation(D)
6867

69-
# self.scaler = StandardScaler()
70-
# D_proj = self.scaler.fit_transform(D_proj)
71-
D_proj = D_proj.T
68+
M, A = self.decompose_rpca(D, Omega)
7269

73-
M, A = self.decompose_rpca(D_proj, Omega)
74-
75-
# M = self.scaler.inverse_transform(M.T).T
76-
# A = self.scaler.inverse_transform(A.T).T
7770
M_final = utils.get_shape_original(M, X.shape)
7871
A_final = utils.get_shape_original(A, X.shape)
7972

qolmat/imputations/rpca/rpca_noisy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class RPCANoisy(RPCA):
5151

5252
def __init__(
5353
self,
54-
period: Optional[int] = None,
54+
period: int = 1,
5555
rank: Optional[int] = None,
5656
tau: Optional[float] = None,
5757
lam: Optional[float] = None,

qolmat/imputations/rpca/rpca_pcp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class RPCAPCP(RPCA):
2929

3030
def __init__(
3131
self,
32-
period: Optional[int] = None,
32+
period: int = 1,
3333
mu: Optional[float] = None,
3434
lam: Optional[float] = None,
3535
max_iter: int = int(1e4),

qolmat/utils/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,11 @@ def __init__(self):
44
"""Please install keras xx.xx.xx
55
pip install qolmat[keras]"""
66
)
7+
8+
9+
class SignalTooShort(Exception):
10+
def __init__(self, period, n_cols):
11+
super().__init__(
12+
f"""`period` must be smaller than the signals duration.
13+
`period`is {period} but the number of columns if {n_cols}"""
14+
)

0 commit comments

Comments
 (0)