@@ -19,6 +19,18 @@ In Qolmat, a few data imputation methods are implemented as well as a way to eva
1919
2020First, import some useful librairies
2121
22+ ``` python
23+ X= np.array([[0 ], [1 ], [2 ]])
24+ ```
25+
26+ ``` python
27+ np.cov(X)
28+ ```
29+
30+ ``` python
31+
32+ ```
33+
2234``` python
2335import warnings
2436# warnings.filterwarnings('error')
@@ -116,52 +128,70 @@ ratio_masked = 0.1
116128```
117129
118130``` python
119- imputer_mean = imputers.ImputerMean(groups = [ " station" ] )
120- imputer_median = imputers.ImputerMedian(groups = [ " station" ] )
121- imputer_mode = imputers.ImputerMode(groups = [ " station" ] )
122- imputer_locf = imputers.ImputerLOCF(groups = [ " station" ] )
123- imputer_nocb = imputers.ImputerNOCB(groups = [ " station" ] )
124- imputer_interpol = imputers.ImputerInterpolation(groups = [ " station" ] , method = " linear" )
125- imputer_spline = imputers.ImputerInterpolation(groups = [ " station" ] , method = " spline" , order = 2 )
126- imputer_shuffle = imputers.ImputerShuffle(groups = [ " station" ] )
127- imputer_residuals = imputers.ImputerResiduals(groups = [ " station" ] , period = 365 , model_tsa = " additive" , extrapolate_trend = " freq" , method_interpolation = " linear" )
128-
129- imputer_rpca = imputers.ImputerRPCA(groups = [ " station" ], columnwise = False , max_iter = 256 , tau = 2 , lam = 1 )
130-
131- imputer_ou = imputers.ImputerEM(groups = [ " station" ] , model = " multinormal" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
132- imputer_tsou = imputers.ImputerEM(groups = [ " station" ] , model = " VAR1" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
133- imputer_tsmle = imputers.ImputerEM(groups = [ " station" ] , model = " VAR1" , method = " mle" , max_iter_em = 100 , n_iter_ou = 15 , dt = 1e-3 )
134-
135-
136- imputer_knn = imputers.ImputerKNN(groups = [ " station" ], k = 10 )
137- imputer_mice = imputers.ImputerMICE(groups = [ " station" ] , estimator = LinearRegression(), sample_posterior = False , max_iter = 100 , missing_values = np.nan)
138- imputer_regressor = imputers.ImputerRegressor(groups = [ " station" ] , estimator = LinearRegression())
131+ imputer_mean = imputers.ImputerMean(groups = ( " station" ,) )
132+ imputer_median = imputers.ImputerMedian(groups = ( " station" ,) )
133+ imputer_mode = imputers.ImputerMode(groups = ( " station" ,) )
134+ imputer_locf = imputers.ImputerLOCF(groups = ( " station" ,) )
135+ imputer_nocb = imputers.ImputerNOCB(groups = ( " station" ,) )
136+ imputer_interpol = imputers.ImputerInterpolation(groups = ( " station" ,) , method = " linear" )
137+ imputer_spline = imputers.ImputerInterpolation(groups = ( " station" ,) , method = " spline" , order = 2 )
138+ imputer_shuffle = imputers.ImputerShuffle(groups = ( " station" ,) )
139+ imputer_residuals = imputers.ImputerResiduals(groups = ( " station" ,) , period = 365 , model_tsa = " additive" , extrapolate_trend = " freq" , method_interpolation = " linear" )
140+
141+ imputer_rpca = imputers.ImputerRPCA(groups = ( " station" ,), columnwise = False , max_iterations = 256 , tau = 2 , lam = 1 )
142+
143+ imputer_ou = imputers.ImputerEM(groups = ( " station" ,) , model = " multinormal" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
144+ imputer_tsou = imputers.ImputerEM(groups = ( " station" ,) , model = " VAR1" , method = " sample" , max_iter_em = 34 , n_iter_ou = 15 , dt = 1e-3 )
145+ imputer_tsmle = imputers.ImputerEM(groups = ( " station" ,) , model = " VAR1" , method = " mle" , max_iter_em = 100 , n_iter_ou = 15 , dt = 1e-3 )
146+
147+
148+ imputer_knn = imputers.ImputerKNN(groups = ( " station" ,), n_neighbors = 10 )
149+ imputer_mice = imputers.ImputerMICE(groups = ( " station" ,) , estimator = LinearRegression(), sample_posterior = False , max_iter = 100 , missing_values = np.nan)
150+ imputer_regressor = imputers.ImputerRegressor(groups = ( " station" ,) , estimator = LinearRegression())
139151```
140152
141153``` python
142- generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits = 2 , groups = [ " station" ] , subset = cols_to_impute, ratio_masked = ratio_masked)
154+ generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits = 2 , groups = ( " station" ,) , subset = cols_to_impute, ratio_masked = ratio_masked)
143155```
144156
145157``` python
146158dict_config_opti = {
147159 " tau" : ho.hp.uniform(" tau" , low = .5 , high = 5 ),
148160 " lam" : ho.hp.uniform(" lam" , low = .1 , high = 1 ),
149161}
150- imputer_rpca_opti = imputers.ImputerRPCA(groups = [ " station" ], columnwise = False , max_iter = 256 )
162+ imputer_rpca_opti = imputers.ImputerRPCA(groups = ( " station" ,), columnwise = False , max_iterations = 256 )
151163imputer_rpca_opti = hyperparameters.optimize(
152164 imputer_rpca_opti,
153165 df_data,
154166 generator = generator_holes,
155167 metric = " mae" ,
156168 max_evals = 10 ,
157- dict_config_opti = dict_config_opti
169+ dict_spaces = dict_config_opti
158170)
159171# imputer_rpca_opti.params_optim = hyperparams_opti
160172```
161173
174+ ``` python
175+ dict_config_opti2 = {
176+ " tau/TEMP" : ho.hp.uniform(" tau/TEMP" , low = .5 , high = 5 ),
177+ " tau/PRES" : ho.hp.uniform(" tau/PRES" , low = .5 , high = 5 ),
178+ " lam/TEMP" : ho.hp.uniform(" lam/TEMP" , low = .1 , high = 1 ),
179+ " lam/PRES" : ho.hp.uniform(" lam/PRES" , low = .1 , high = 1 ),
180+ }
181+ imputer_rpca_opti2 = imputers.ImputerRPCA(groups = (" station" ,), columnwise = True , max_iterations = 256 )
182+ imputer_rpca_opti2 = hyperparameters.optimize(
183+ imputer_rpca_opti2,
184+ df_data,
185+ generator = generator_holes,
186+ metric = " mae" ,
187+ max_evals = 10 ,
188+ dict_spaces = dict_config_opti2
189+ )
190+ ```
191+
162192``` python
163193dict_imputers = {
164- # "mean": imputer_mean,
194+ " mean" : imputer_mean,
165195 # "median": imputer_median,
166196 # "mode": imputer_mode,
167197 " interpolation" : imputer_interpol,
@@ -171,8 +201,9 @@ dict_imputers = {
171201 # "OU": imputer_ou,
172202 " TSOU" : imputer_tsou,
173203 " TSMLE" : imputer_tsmle,
174- " RPCA" : imputer_rpca,
175- " RPCA_opti" : imputer_rpca_opti,
204+ # "RPCA": imputer_rpca,
205+ # "RPCA_opti": imputer_rpca_opti,
206+ # "RPCA_opti2": imputer_rpca_opti2,
176207 # "locf": imputer_locf,
177208 # "nocb": imputer_nocb,
178209 # "knn": imputer_knn,
@@ -308,7 +339,7 @@ for i_col, col in enumerate(cols_to_impute):
308339 loc = plticker.MultipleLocator(base = 2 * 365 )
309340 ax.xaxis.set_major_locator(loc)
310341 ax.tick_params(axis = ' both' , which = ' major' )
311- plt.xlim(datetime(2010 , 1 , 1 ), datetime(2015 , 3 , 1 ))
342+ # plt.xlim(datetime(2019, 2 , 1), datetime(2019 , 3, 1))
312343 i_plot += 1
313344plt.savefig(" figures/imputations_benchmark.png" )
314345plt.show()
0 commit comments