88 format_version : ' 1.3'
99 jupytext_version : 1.14.4
1010 kernelspec :
11- display_name : env_qolmat
11+ display_name : env_qolmat_dev
1212 language : python
13- name : env_qolmat
13+ name : env_qolmat_dev
1414---
1515
1616** This notebook aims to present the Qolmat repo through an example of a multivariate time series.
@@ -73,15 +73,15 @@ cols_to_impute = ["TEMP", "PRES"]
7373
7474The dataset ` Artificial ` is designed to have a sum of a periodical signal, a white noise and some outliers.
7575
76- ``` python tags=[]
77- df_data
78- ```
79-
8076``` python
8177# df_data = data.get_data_corrupted("Artificial", ratio_masked=.2, mean_size=10)
8278# cols_to_impute = ["signal"]
8379```
8480
81+ ``` python tags=[]
82+ df_data
83+ ```
84+
8585Let's take a look at variables to impute. We only consider a station, Aotizhongxin.
8686Time series display seasonalities (roughly 12 months).
8787
@@ -131,8 +131,8 @@ imputer_spline = imputers.ImputerInterpolation(groups=["station"], method="splin
131131imputer_shuffle = imputers.ImputerShuffle(groups = [" station" ])
132132imputer_residuals = imputers.ImputerResiduals(groups = [" station" ], period = 7 , model_tsa = " additive" , extrapolate_trend = " freq" , method_interpolation = " linear" )
133133
134- imputer_rpca = imputers.ImputerRPCA(groups = [" station" ], columnwise = True , period = 365 , max_iter = 200 , tau = 2 , lam = .3 )
135- imputer_rpca_opti = imputers.ImputerRPCA(groups = [" station" ], columnwise = True , period = 365 , max_iter = 100 )
134+ imputer_rpca = imputers.ImputerRPCA(groups = [" station" ], columnwise = True , period = 7 , max_iter = 200 , tau = 2 , lam = .3 )
135+ imputer_rpca_opti = imputers.ImputerRPCA(groups = [" station" ], columnwise = True , period = 7 , max_iter = 100 )
136136
137137imputer_ou = imputers.ImputerEM(groups = [" station" ], model = " multinormal" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
138138imputer_tsou = imputers.ImputerEM(groups = [" station" ], model = " VAR1" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
@@ -154,8 +154,8 @@ dict_imputers = {
154154 # "OU": imputer_ou,
155155 # "TSOU": imputer_tsou,
156156 # "TSMLE": imputer_tsmle,
157- # "RPCA": imputer_rpca,
158- # "RPCA_opti": imputer_rpca_opti,
157+ " RPCA" : imputer_rpca,
158+ " RPCA_opti" : imputer_rpca_opti,
159159 # "locf": imputer_locf,
160160 # "nocb": imputer_nocb,
161161 # "knn": imputer_knn,
@@ -164,7 +164,7 @@ dict_imputers = {
164164}
165165n_imputers = len (dict_imputers)
166166
167- search_params = {
167+ dict_config_opti = {
168168 " RPCA_opti" : {
169169 " tau" : {" min" : .5 , " max" : 5 , " type" :" Real" },
170170 " lam" : {" min" : .1 , " max" : 1 , " type" :" Real" },
@@ -195,15 +195,15 @@ comparison = comparator.Comparator(
195195 generator_holes = generator_holes,
196196 metrics = [" mae" , " wmape" , " KL_columnwise" , " ks_test" , " energy" ],
197197 n_calls_opt = 10 ,
198- search_params = search_params ,
198+ dict_config_opti = dict_config_opti ,
199199)
200200results = comparison.compare(df_data)
201201results
202202```
203203
204204``` python
205205df_plot = results.loc[" energy" , " All" ]
206- plt.bar (df_plot.index, df_plot, color = tab10(0 ))
206+ plt.barh (df_plot.index, df_plot, color = tab10(0 ))
207207plt.show()
208208```
209209
@@ -335,15 +335,15 @@ dict_imputers["MLP"] = imputer_mlp = imputers_keras.ImputerRegressorKeras(estima
335335
336336We can re-run the imputation model benchmark as before.
337337
338- ``` python tags=[] jupyter={"outputs_hidden": true}
338+ ``` python tags=[]
339339generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits = 2 , subset = cols_to_impute, ratio_masked = ratio_masked)
340340
341341comparison = comparator.Comparator(
342342 dict_imputers,
343343 df_data.columns,
344344 generator_holes = generator_holes,
345345 n_calls_opt = 10 ,
346- search_params = search_params ,
346+ dict_config_opti = dict_config_opti ,
347347)
348348results = comparison.compare(df_data)
349349results
@@ -356,7 +356,7 @@ plt.ylabel("mae")
356356plt.show()
357357```
358358
359- ``` python jupyter={"outputs_hidden": true}
359+ ``` python
360360df_plot = df_data
361361dfs_imputed = {name: imp.fit_transform(df_plot) for name, imp in dict_imputers.items()}
362362station = df_plot.index.get_level_values(" station" )[0 ]
0 commit comments