1010df_origin = pd .DataFrame ({"col1" : [0 , np .nan , 2 , 4 , np .nan ], "col2" : [- 1 , np .nan , 0.5 , 1 , 1.5 ]})
1111df_imputed = pd .DataFrame ({"col1" : [0 , 1 , 2 , 3.5 , 4 ], "col2" : [- 1.5 , 0 , 1.5 , 2 , 1.5 ]})
1212df_mask = pd .DataFrame (
13- {"col1" : [False , False , True , True , False ], "col2" : [True , False , True , True , False ]}
13+ {"col1" : [False , False , True , False , False ], "col2" : [True , False , True , True , False ]}
1414)
1515df_corrupted = df_origin .copy ()
1616df_corrupted [df_mask ] = np .nan
1717
18- imputer_rpca = ImputerRPCA (tau = 2 , random_state = 42 )
18+ imputer_rpca = ImputerRPCA (tau = 2 , random_state = 42 , columnwise = True , period = 1 )
1919dict_imputers_rpca = {"rpca" : imputer_rpca }
2020generator_holes = EmpiricalHoleGenerator (n_splits = 1 , ratio_masked = 0.5 )
2121dict_config_opti = {
2222 "rpca" : {
23- "lam" : {"min" : 0.1 , "max" : 1 , "type" : "Real" },
23+ "lam" : {
24+ "col1" : {"min" : 0.1 , "max" : 6 , "type" : "Real" },
25+ "col2" : {"min" : 1 , "max" : 4 , "type" : "Real" },
26+ },
27+ "tol" : {"min" : 1e-6 , "max" : 0.1 , "type" : "Real" },
2428 "max_iter" : {"min" : 99 , "max" : 100 , "type" : "Integer" },
2529 "norm" : {"categories" : ["L1" , "L2" ], "type" : "Categorical" },
2630 }
2731}
28- dict_config_opti_imputer = dict_config_opti . get ( "rpca" , {})
29- hyperparams_flat = {"lam" : 0.93382 , "max_iter" : 100 , "norm" : "L1" }
32+ dict_config_opti_imputer = dict_config_opti [ "rpca" ]
33+ hyperparams_flat = {"lam/col1 " : 4.7 , "lam/col2" : 1.5 , "tol" : 0.07 , "max_iter" : 100 , "norm" : "L1" }
3034
3135cv = cross_validation .CrossValidation (
3236 imputer = imputer_rpca ,
3337 dict_config_opti_imputer = dict_config_opti_imputer ,
3438 hole_generator = generator_holes ,
3539)
3640
37- result_params_expected = {"lam" : (0.1 , 1 ), "max_iter" : (99 , 100 ), "norm" : ("L1" , "L2" )}
41+ result_params_expected = {
42+ "lam1" : (0.1 , 6 ),
43+ "lam2" : (1 , 4 ),
44+ "tol" : (1e-6 , 0.1 ),
45+ "max_iter" : (99 , 100 ),
46+ "norm" : ("L1" , "L2" ),
47+ }
3848
3949
40- @pytest .mark .parametrize ("dict_bounds" , [dict_config_opti_imputer ])
41- @pytest .mark .parametrize ("param" , ["lam" , "max_iter" , "norm" ])
42- def test_benchmark_cross_validation_get_dimension (dict_bounds : Dict , param : str ) -> None :
43- result = cross_validation .get_dimension (dict_bounds = dict_bounds [param ], name_dimension = param )
50+ @pytest .mark .parametrize ("dict_config_opti_imputer" , [dict_config_opti_imputer ])
51+ @pytest .mark .parametrize ("param" , ["tol" , "max_iter" , "norm" ])
52+ def test_benchmark_cross_validation_get_dimension (
53+ dict_config_opti_imputer : Dict , param : str
54+ ) -> None :
55+ result = cross_validation .get_dimension (
56+ dict_bounds = dict_config_opti_imputer [param ], name_dimension = param
57+ )
4458 result_expected = result_params_expected [param ]
45- np . testing . assert_equal ( result .bounds , result_expected )
59+ assert result .bounds == result_expected
4660
4761
4862@pytest .mark .parametrize ("dict_config_opti_imputer" , [dict_config_opti_imputer ])
4963def test_benchmark_cross_validation_get_search_space (dict_config_opti_imputer : Dict ) -> None :
5064 list_result = cross_validation .get_search_space (dict_config_opti_imputer )
51- result_expected = [
52- result_params_expected ["lam" ],
53- result_params_expected ["max_iter" ],
54- result_params_expected ["norm" ],
55- ]
56- for i in range (3 ):
57- np .testing .assert_equal (list_result [i ].bounds , result_expected [i ])
65+ list_expected_bounds = list (result_params_expected .values ())
66+ for result , expected_bounds in zip (list_result , list_expected_bounds ):
67+ assert result .bounds == expected_bounds
5868
5969
6070@pytest .mark .parametrize ("hyperparams_flat" , [hyperparams_flat ])
6171def test_benchmark_cross_validation_deflat_hyperparams (
6272 hyperparams_flat : Dict [str , Union [float , int , str ]]
6373) -> None :
64- resul_deflat = cross_validation .deflat_hyperparams (hyperparams_flat = hyperparams_flat )
65- result = list (resul_deflat .values ())
66- result_expected = [0.93382 , 100 , "L1" ]
67- np .testing .assert_equal (result , result_expected )
74+ result_deflat = cross_validation .deflat_hyperparams (hyperparams_flat = hyperparams_flat )
75+ result_expected = {
76+ "lam" : {"col1" : 4.7 , "col2" : 1.5 },
77+ "tol" : 0.07 ,
78+ "max_iter" : 100 ,
79+ "norm" : "L1" ,
80+ }
81+ assert result_deflat == result_expected
6882
6983
7084@pytest .mark .parametrize ("df1" , [df_origin ])
@@ -78,23 +92,27 @@ def test_benchmark_cross_validation_loss_function(
7892 np .testing .assert_raises (ValueError , cv .loss_function , df1 , df2 , df_mask )
7993 cv .loss_norm = 2
8094 result_cv2 = cv .loss_function (df_origin = df1 , df_imputed = df2 , df_mask = df_mask )
81- np .testing .assert_allclose (result_cv2 , 1.58113 , atol = 1e-5 )
95+ np .testing .assert_allclose (result_cv2 , 1.5 , atol = 1e-5 )
8296 cv .loss_norm = 1
8397 result_cv1 = cv .loss_function (df_origin = df1 , df_imputed = df2 , df_mask = df_mask )
84- np .testing .assert_allclose (result_cv1 , 3 , atol = 1e-5 )
98+ np .testing .assert_allclose (result_cv1 , 2.5 , atol = 1e-5 )
8599
86100
87101@pytest .mark .parametrize ("df" , [df_corrupted ])
88102def test_benchmark_cross_validation_optimize_hyperparams (df : pd .DataFrame ) -> None :
89103 result_hp = cv .optimize_hyperparams (df )
90- result = list (result_hp .values ())
91- result_expected = [0.8168886881742098 , 99 , "L2" ]
92- np .testing .assert_equal (result , result_expected )
104+ result_expected = {
105+ "lam/col1" : 4.799603622475375 ,
106+ "lam/col2" : 1.5503043695984915 ,
107+ "tol" : 0.07796932033627668 ,
108+ "max_iter" : 100 ,
109+ "norm" : "L1" ,
110+ }
111+ assert result_hp == result_expected
93112
94113
95114@pytest .mark .parametrize ("df" , [df_corrupted ])
96115def test_benchmark_cross_validation_fit_transform (df : pd .DataFrame ) -> None :
97116 result_cv = cv .fit_transform (df )
98- result = np .array (result_cv )
99- result_expected = np .array ([[0 , 1.5 ], [0 , 1.5 ], [0 , 1.5 ], [0 , 1.5 ], [0 , 1.5 ]])
100- np .testing .assert_allclose (result , result_expected , atol = 1e-5 )
117+ result_expected = pd .DataFrame ({"col1" : [0 , 2 , 2 , 4 , 2 ], "col2" : [1.5 , 1.5 , 1.5 , 1.5 , 1.5 ]})
118+ np .testing .assert_allclose (result_cv , result_expected , atol = 1e-5 )
0 commit comments