@@ -116,6 +116,8 @@ ratio_masked = 0.1
116116```
117117
118118``` python
119+ dict_config_opti = {}
120+
119121imputer_mean = imputers.ImputerMean(groups = (" station" ,))
120122imputer_median = imputers.ImputerMedian(groups = (" station" ,))
121123imputer_mode = imputers.ImputerMode(groups = (" station" ,))
@@ -126,7 +128,19 @@ imputer_spline = imputers.ImputerInterpolation(groups=("station",), method="spli
126128imputer_shuffle = imputers.ImputerShuffle(groups = (" station" ,))
127129imputer_residuals = imputers.ImputerResiduals(groups = (" station" ,), period = 365 , model_tsa = " additive" , extrapolate_trend = " freq" , method_interpolation = " linear" )
128130
129- imputer_rpca = imputers.ImputerRPCA(groups = (" station" ,), columnwise = False , max_iterations = 256 , tau = 2 , lam = 1 )
131+ imputer_rpca = imputers.ImputerRPCA(groups = (" station" ,), columnwise = False , max_iterations = 500 , tau = 2 , lam = 0.05 )
132+ imputer_rpca_opti = imputers.ImputerRPCA(groups = (" station" ,), columnwise = False , max_iterations = 256 )
133+ dict_config_opti[" RPCA_opti" ] = {
134+ " tau" : ho.hp.uniform(" tau" , low = .5 , high = 5 ),
135+ " lam" : ho.hp.uniform(" lam" , low = .1 , high = 1 ),
136+ }
137+ imputer_rpca_opticw = imputers.ImputerRPCA(groups = (" station" ,), columnwise = False , max_iterations = 256 )
138+ dict_config_opti[" RPCA_opticw" ] = {
139+ " tau/TEMP" : ho.hp.uniform(" tau/TEMP" , low = .5 , high = 5 ),
140+ " tau/PRES" : ho.hp.uniform(" tau/PRES" , low = .5 , high = 5 ),
141+ " lam/TEMP" : ho.hp.uniform(" lam/TEMP" , low = .1 , high = 1 ),
142+ " lam/PRES" : ho.hp.uniform(" lam/PRES" , low = .1 , high = 1 ),
143+ }
130144
131145imputer_ou = imputers.ImputerEM(groups = (" station" ,), model = " multinormal" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
132146imputer_tsou = imputers.ImputerEM(groups = (" station" ,), model = " VAR1" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
@@ -142,41 +156,6 @@ imputer_regressor = imputers.ImputerRegressor(groups=("station",), estimator=Lin
142156generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits = 2 , groups = (" station" ,), subset = cols_to_impute, ratio_masked = ratio_masked)
143157```
144158
145- ``` python
146- dict_config_opti = {
147- " tau" : ho.hp.uniform(" tau" , low = .5 , high = 5 ),
148- " lam" : ho.hp.uniform(" lam" , low = .1 , high = 1 ),
149- }
150- imputer_rpca_opti = imputers.ImputerRPCA(groups = (" station" ,), columnwise = False , max_iterations = 256 )
151- imputer_rpca_opti = hyperparameters.optimize(
152- imputer_rpca_opti,
153- df_data,
154- generator = generator_holes,
155- metric = " mae" ,
156- max_evals = 10 ,
157- dict_spaces = dict_config_opti
158- )
159- # imputer_rpca_opti.params_optim = hyperparams_opti
160- ```
161-
162- ``` python
163- dict_config_opti2 = {
164- " tau/TEMP" : ho.hp.uniform(" tau/TEMP" , low = .5 , high = 5 ),
165- " tau/PRES" : ho.hp.uniform(" tau/PRES" , low = .5 , high = 5 ),
166- " lam/TEMP" : ho.hp.uniform(" lam/TEMP" , low = .1 , high = 1 ),
167- " lam/PRES" : ho.hp.uniform(" lam/PRES" , low = .1 , high = 1 ),
168- }
169- imputer_rpca_opti2 = imputers.ImputerRPCA(groups = (" station" ,), columnwise = True , max_iterations = 256 )
170- imputer_rpca_opti2 = hyperparameters.optimize(
171- imputer_rpca_opti2,
172- df_data,
173- generator = generator_holes,
174- metric = " mae" ,
175- max_evals = 10 ,
176- dict_spaces = dict_config_opti2
177- )
178- ```
179-
180159``` python
181160dict_imputers = {
182161 " mean" : imputer_mean,
@@ -189,9 +168,9 @@ dict_imputers = {
189168 # "OU": imputer_ou,
190169 " TSOU" : imputer_tsou,
191170 " TSMLE" : imputer_tsmle,
192- # "RPCA": imputer_rpca,
193- # "RPCA_opti": imputer_rpca_opti,
194- # "RPCA_opti2 ": imputer_rpca_opti2,
171+ " RPCA" : imputer_rpca,
172+ " RPCA_opti" : imputer_rpca_opti,
173+ # "RPCA_opticw ": imputer_rpca_opti2,
195174 # "locf": imputer_locf,
196175 # "nocb": imputer_nocb,
197176 # "knn": imputer_knn,
@@ -218,7 +197,7 @@ comparison = comparator.Comparator(
218197 dict_imputers,
219198 cols_to_impute,
220199 generator_holes = generator_holes,
221- metrics = [" mae" , " wmape" , " KL_columnwise" , " ks_test" ],
200+ metrics = [" mae" , " wmape" , " KL_columnwise" , " ks_test" , " dist_corr_pattern " ],
222201 max_evals = 10 ,
223202 dict_config_opti = dict_config_opti,
224203)
@@ -230,11 +209,13 @@ results
230209df_plot = results.loc[" KL_columnwise" ,' TEMP' ]
231210plt.barh(df_plot.index, df_plot, color = tab10(0 ))
232211plt.title(' TEMP' )
212+ plt.xlabel(" KL" )
233213plt.show()
234214
235215df_plot = results.loc[" KL_columnwise" ,' PRES' ]
236216plt.barh(df_plot.index, df_plot, color = tab10(0 ))
237217plt.title(' PRES' )
218+ plt.xlabel(" KL" )
238219plt.show()
239220```
240221
@@ -245,8 +226,8 @@ plot.multibar(results.loc["mae"], decimals=1)
245226plt.ylabel(" mae" )
246227
247228fig.add_subplot(2 , 1 , 2 )
248- plot.multibar(results.loc[" KL_columnwise " ], decimals = 1 )
249- plt.ylabel(" KL " )
229+ plot.multibar(results.loc[" dist_corr_pattern " ], decimals = 2 )
230+ plt.ylabel(" dist_corr_pattern " )
250231
251232plt.savefig(" figures/imputations_benchmark_errors.png" )
252233plt.show()
@@ -294,10 +275,6 @@ for col in cols_to_impute:
294275
295276```
296277
297- ``` python
298- dfs_imputed
299- ```
300-
301278``` python
302279# plot.plot_imputations(df_station, dfs_imputed_station)
303280
0 commit comments