88 format_version : ' 1.3'
99 jupytext_version : 1.14.4
1010 kernelspec :
11- display_name : env_qolmat_dev
11+ display_name : env_qolmat
1212 language : python
13- name : env_qolmat_dev
13+ name : env_qolmat
1414---
1515
1616** This notebook aims to present the Qolmat repo through an example of a multivariate time series.
@@ -172,8 +172,8 @@ dict_imputers = {
172172 # "locf": imputer_locf,
173173 # "nocb": imputer_nocb,
174174 # "knn": imputer_knn,
175- # "ols": imputer_regressor,
176- # "mice_ols": imputer_mice,
175+ " ols" : imputer_regressor,
176+ " mice_ols" : imputer_mice,
177177}
178178n_imputers = len (dict_imputers)
179179```
@@ -295,13 +295,14 @@ plt.show()
295295
296296```
297297
298- ## (Optional) Neuronal Network Model
298+ ## (Optional) Deep Learning Model
299299
300300
301301In this section, we present an MLP model of data imputation using Keras, which can be installed using a "pip install tensorflow".
302302
303303``` python
304304from qolmat.imputations import imputers_pytorch
305+ from qolmat.imputations.diffusions.ddpms import TabDDPM
305306try :
306307 import torch.nn as nn
307308except ModuleNotFoundError :
@@ -323,33 +324,56 @@ For the example, we use a simple MLP model with 3 layers of neurons.
323324Then we train the model without taking a group on the stations
324325
325326``` python
326- estimator = nn.Sequential(
327- nn.Linear(np.sum(df_data.isna().sum()== 0 ), 256 ),
328- nn.ReLU(),
329- nn.Linear(256 , 128 ),
330- nn.ReLU(),
331- nn.Linear(128 , 64 ),
332- nn.ReLU(),
333- nn.Linear(64 , 1 )
334- )
335- # imputers_pytorch.build_mlp_example(input_dim=np.sum(df_data.isna().sum()==0), list_num_neurons=[256,128,64])
336- dict_imputers[" MLP" ] = imputer_mlp = imputers_pytorch.ImputerRegressorPyTorch(estimator = estimator, groups = [' station' ], handler_nan = " column" , epochs = 500 )
327+ fig = plt.figure(figsize = (10 * n_stations, 3 * n_cols))
328+ for i_station, (station, df) in enumerate (df_data.groupby(" station" )):
329+ df_station = df_data.loc[station]
330+ for i_col, col in enumerate (cols_to_impute):
331+ fig.add_subplot(n_cols, n_stations, i_col * n_stations + i_station + 1 )
332+ plt.plot(df_station[col], ' .' , label = station)
333+ # break
334+ plt.ylabel(col)
335+ plt.xticks(rotation = 15 )
336+ if i_col == 0 :
337+ plt.title(station)
338+ if i_col != n_cols - 1 :
339+ plt.xticks([], [])
340+ plt.show()
341+ ```
342+
343+ ``` python
344+ # estimator = nn.Sequential(
345+ # nn.Linear(np.sum(df_data.isna().sum()==0), 256),
346+ # nn.ReLU(),
347+ # nn.Linear(256, 128),
348+ # nn.ReLU(),
349+ # nn.Linear(128, 64),
350+ # nn.ReLU(),
351+ # nn.Linear(64, 1)
352+ # )
353+ estimator = imputers_pytorch.build_mlp(input_dim = np.sum(df_data.isna().sum()== 0 ), list_num_neurons = [256 ,128 ,64 ])
354+ encoder, decoder = imputers_pytorch.build_autoencoder(input_dim = df_data.values.shape[1 ],latent_dim = 4 , output_dim = df_data.values.shape[1 ], list_num_neurons = [4 * 4 , 2 * 4 ])
355+ ```
356+
357+ ``` python
358+ dict_imputers[" MLP" ] = imputer_mlp = imputers_pytorch.ImputerRegressorPyTorch(estimator = estimator, groups = (' station' ,), handler_nan = " column" , epochs = 500 )
359+ dict_imputers[" Autoencoder" ] = imputer_autoencoder = imputers_pytorch.ImputerAutoencoder(encoder, decoder, max_iterations = 100 , epochs = 100 )
360+ dict_imputers[" Diffusion" ] = imputer_diffusion = imputers_pytorch.ImputerDiffusion(model = TabDDPM(num_sampling = 5 ), epochs = 100 , batch_size = 100 )
337361```
338362
339363We can re-run the imputation model benchmark as before.
340364``` python tags=[]
341- generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits = 2 , groups = [ " station" ] , subset = cols_to_impute, ratio_masked = ratio_masked)
365+ generator_holes = missing_patterns.EmpiricalHoleGenerator(n_splits = 3 , groups = ( ' station' ,) , subset = cols_to_impute, ratio_masked = ratio_masked)
342366
343367comparison = comparator.Comparator(
344368 dict_imputers,
345- cols_to_impute ,
369+ selected_columns = df_data.columns ,
346370 generator_holes = generator_holes,
347371 metrics = [" mae" , " wmape" , " KL_columnwise" , " ks_test" ],
348372 max_evals = 10 ,
349373 dict_config_opti = dict_config_opti,
350374)
351375results = comparison.compare(df_data)
352- results
376+ results.style.highlight_min( color = " green " , axis = 1 )
353377```
354378``` python tags=[]
355379df_plot = df_data
0 commit comments