Skip to content

Commit 58e5459

Browse files
author
vm-aifluence-jro
committed
merged with dev
2 parents 2371832 + 3ecaaf2 commit 58e5459

File tree

13 files changed

+363
-368
lines changed

13 files changed

+363
-368
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,6 @@ examples/local
6262

6363
# VSCode
6464
.vscode
65-
# examples/benchmark.ipynb
65+
66+
# Logs
67+
nohup.txt

examples/benchmark.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,15 @@ cols_to_impute = ["TEMP", "PRES"]
7373

7474
The dataset `Artificial` is designed to have a sum of a periodical signal, a white noise and some outliers.
7575

76-
```python tags=[]
77-
df_data
78-
```
79-
8076
```python
8177
# df_data = data.get_data_corrupted("Artificial", ratio_masked=.2, mean_size=10)
8278
# cols_to_impute = ["signal"]
8379
```
8480

81+
```python tags=[]
82+
df_data
83+
```
84+
8585
Let's take a look at variables to impute. We only consider a station, Aotizhongxin.
8686
Time series display seasonalities (roughly 12 months).
8787

@@ -131,8 +131,8 @@ imputer_spline = imputers.ImputerInterpolation(groups=["station"], method="splin
131131
imputer_shuffle = imputers.ImputerShuffle(groups=["station"])
132132
imputer_residuals = imputers.ImputerResiduals(groups=["station"], period=7, model_tsa="additive", extrapolate_trend="freq", method_interpolation="linear")
133133

134-
imputer_rpca = imputers.ImputerRPCA(groups=["station"], columnwise=True, period=365, max_iter=200, tau=2, lam=.3)
135-
imputer_rpca_opti = imputers.ImputerRPCA(groups=["station"], columnwise=True, period=365, max_iter=100)
134+
imputer_rpca = imputers.ImputerRPCA(groups=["station"], columnwise=True, period=7, max_iter=200, tau=2, lam=.3)
135+
imputer_rpca_opti = imputers.ImputerRPCA(groups=["station"], columnwise=True, period=7, max_iter=100)
136136

137137
imputer_ou = imputers.ImputerEM(groups=["station"], model="multinormal", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
138138
imputer_tsou = imputers.ImputerEM(groups=["station"], model="VAR1", method="sample", max_iter_em=34, n_iter_ou=15, dt=1e-3)
@@ -154,8 +154,8 @@ dict_imputers = {
154154
# "OU": imputer_ou,
155155
# "TSOU": imputer_tsou,
156156
# "TSMLE": imputer_tsmle,
157-
# "RPCA": imputer_rpca,
158-
# "RPCA_opti": imputer_rpca_opti,
157+
"RPCA": imputer_rpca,
158+
"RPCA_opti": imputer_rpca_opti,
159159
# "locf": imputer_locf,
160160
# "nocb": imputer_nocb,
161161
# "knn": imputer_knn,
@@ -164,7 +164,7 @@ dict_imputers = {
164164
}
165165
n_imputers = len(dict_imputers)
166166

167-
search_params = {
167+
dict_config_opti = {
168168
"RPCA_opti": {
169169
"tau": {"min": .5, "max": 5, "type":"Real"},
170170
"lam": {"min": .1, "max": 1, "type":"Real"},
@@ -195,15 +195,15 @@ comparison = comparator.Comparator(
195195
generator_holes = generator_holes,
196196
metrics=["mae", "wmape", "KL_columnwise", "ks_test", "energy"],
197197
n_calls_opt=10,
198-
search_params=search_params,
198+
dict_config_opti=dict_config_opti,
199199
)
200200
results = comparison.compare(df_data)
201201
results
202202
```
203203

204204
```python
205205
df_plot = results.loc["energy", "All"]
206-
plt.bar(df_plot.index, df_plot, color=tab10(0))
206+
plt.barh(df_plot.index, df_plot, color=tab10(0))
207207
plt.show()
208208
```
209209

@@ -343,7 +343,7 @@ comparison = comparator.Comparator(
343343
df_data.columns,
344344
generator_holes = generator_holes,
345345
n_calls_opt=10,
346-
search_params=search_params,
346+
dict_config_opti=dict_config_opti,
347347
)
348348
results = comparison.compare(df_data)
349349
results

nohup.txt

Lines changed: 0 additions & 184 deletions
This file was deleted.

qolmat/benchmark/comparator.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import logging
21
from functools import partial
3-
from typing import Any, Dict, List, Optional, Union
2+
from typing import Any, Callable, Dict, List, Optional
43

54
import numpy as np
65
import pandas as pd
@@ -21,15 +20,15 @@ class Comparator:
2120
list of column's names selected (all with at least one null value will be imputed)
2221
columnwise_evaluation : Optional[bool], optional
2322
whether the metric should be calculated column-wise or not, by default False
24-
search_params: Optional[Dict[str, Dict[str, Union[str, float, int]]]] = {}
23+
dict_config_opti: Optional[Dict[str, Dict[str, Union[str, float, int]]]] = {}
2524
dictionary of search space for each implementation method. By default, the value is set to
2625
{}.
2726
n_calls_opt: int = 10
2827
number of calls of the optimization algorithm
2928
10.
3029
"""
3130

32-
dict_metrics: Dict[str, Any] = {
31+
dict_metrics: Dict[str, Callable] = {
3332
"mse": metrics.mean_squared_error,
3433
"rmse": metrics.root_mean_squared_error,
3534
"mae": metrics.mean_absolute_error,
@@ -50,14 +49,14 @@ def __init__(
5049
selected_columns: List[str],
5150
generator_holes: _HoleGenerator,
5251
metrics: List = ["mae", "wmape", "KL_columnwise"],
53-
search_params: Optional[Dict[str, Dict[str, Union[float, int, str]]]] = {},
52+
dict_config_opti: Optional[Dict[str, Any]] = {},
5453
n_calls_opt: int = 10,
5554
):
5655
self.dict_imputers = dict_models
5756
self.selected_columns = selected_columns
5857
self.generator_holes = generator_holes
5958
self.metrics = metrics
60-
self.search_params = search_params
59+
self.dict_config_opti = dict_config_opti
6160
self.n_calls_opt = n_calls_opt
6261

6362
def get_errors(
@@ -92,7 +91,7 @@ def evaluate_errors_sample(
9291
self,
9392
imputer: Any,
9493
df: pd.DataFrame,
95-
list_spaces: List[Dict] = [],
94+
dict_config_opti_imputer: Dict[str, Any] = {},
9695
) -> pd.Series:
9796
"""Evaluate the errors in the cross-validation
9897
@@ -102,7 +101,7 @@ def evaluate_errors_sample(
102101
imputation model
103102
df : pd.DataFrame
104103
dataframe to impute
105-
search_space : Dict
104+
dict_config_opti_imputer : Dict
106105
search space for tested_model's hyperparameters
107106
108107
Returns
@@ -115,12 +114,10 @@ def evaluate_errors_sample(
115114
for df_mask in self.generator_holes.split(df_origin):
116115
df_corrupted = df_origin.copy()
117116
df_corrupted[df_mask] = np.nan
118-
119-
assert not np.logical_and(df_mask, df_origin.isna()).any().any()
120-
if list_spaces:
117+
if dict_config_opti_imputer:
121118
cv = cross_validation.CrossValidation(
122119
imputer,
123-
list_spaces=list_spaces,
120+
dict_config_opti_imputer=dict_config_opti_imputer,
124121
hole_generator=self.generator_holes,
125122
n_calls=self.n_calls_opt,
126123
)
@@ -155,12 +152,12 @@ def compare(
155152
dict_errors = {}
156153

157154
for name, imputer in self.dict_imputers.items():
158-
search_params = self.search_params.get(name, {})
159-
160-
list_spaces = utils.get_search_space(search_params)
155+
dict_config_opti_imputer = self.dict_config_opti.get(name, {})
161156

162157
try:
163-
dict_errors[name] = self.evaluate_errors_sample(imputer, df, list_spaces)
158+
dict_errors[name] = self.evaluate_errors_sample(
159+
imputer, df, dict_config_opti_imputer
160+
)
164161
print(f"Tested model: {type(imputer).__name__}")
165162
except Exception as excp:
166163
print("Error while testing ", type(imputer).__name__)

0 commit comments

Comments
 (0)