1818from sklearn .pipeline import Pipeline , make_pipeline
1919from sklearn .preprocessing import OneHotEncoder
2020from sklearn .utils .validation import check_is_fitted
21+ from scipy .stats import ttest_1samp
2122from typing_extensions import TypedDict
2223
2324from mapie ._typing import NDArray
100101 test_size = None ,
101102 random_state = random_state
102103 ),
104+ "cv_plus_median" : Params (
105+ method = "plus" ,
106+ agg_function = "median" ,
107+ cv = KFold (n_splits = 3 , shuffle = True , random_state = random_state ),
108+ test_size = None ,
109+ random_state = random_state
110+ ),
103111 "cv_minmax" : Params (
104112 method = "minmax" ,
105113 agg_function = "mean" ,
131139}
132140
133141WIDTHS = {
134- "naive" : 3.81 ,
135- "split" : 3.87 ,
142+ "naive" : 3.80 ,
143+ "split" : 3.89 ,
136144 "jackknife" : 3.89 ,
137145 "jackknife_plus" : 3.90 ,
138146 "jackknife_minmax" : 3.96 ,
139- "cv" : 3.85 ,
140- "cv_plus" : 3.90 ,
141- "cv_minmax" : 4.04 ,
142- "prefit" : 4.81 ,
143- "cv_plus_median" : 3.90 ,
147+ "cv" : 3.88 ,
148+ "cv_plus" : 3.91 ,
149+ "cv_minmax" : 4.07 ,
150+ "prefit" : 3.89 ,
151+ "cv_plus_median" : 3.91 ,
144152 "jackknife_plus_ab" : 3.90 ,
145- "jackknife_minmax_ab" : 4.13 ,
146- "jackknife_plus_median_ab" : 3.87 ,
153+ "jackknife_minmax_ab" : 4.14 ,
154+ "jackknife_plus_median_ab" : 3.88 ,
147155}
148156
149157COVERAGES = {
150- "naive" : 0.952 ,
151- "split" : 0.952 ,
152- "jackknife" : 0.952 ,
158+ "naive" : 0.954 ,
159+ "split" : 0.956 ,
160+ "jackknife" : 0.956 ,
153161 "jackknife_plus" : 0.952 ,
154- "jackknife_minmax" : 0.952 ,
155- "cv" : 0.958 ,
156- "cv_plus" : 0.956 ,
157- "cv_minmax" : 0.966 ,
158- "prefit" : 0.980 ,
162+ "jackknife_minmax" : 0.962 ,
163+ "cv" : 0.954 ,
164+ "cv_plus" : 0.954 ,
165+ "cv_minmax" : 0.962 ,
166+ "prefit" : 0.956 ,
159167 "cv_plus_median" : 0.954 ,
160168 "jackknife_plus_ab" : 0.952 ,
161- "jackknife_minmax_ab" : 0.970 ,
162- "jackknife_plus_median_ab" : 0.960 ,
169+ "jackknife_minmax_ab" : 0.968 ,
170+ "jackknife_plus_median_ab" : 0.952 ,
163171}
164172
165173
@@ -212,15 +220,15 @@ def test_valid_agg_function(agg_function: str) -> None:
212220
213221@pytest .mark .parametrize (
214222 "cv" , [None , - 1 , 2 , KFold (), LeaveOneOut (),
215- ShuffleSplit (n_splits = 1 ),
223+ ShuffleSplit (n_splits = 1 , test_size = 0.5 ),
216224 PredefinedSplit (test_fold = [- 1 ]* 3 + [0 ]* 3 ),
217225 "prefit" , "split" ]
218226)
219227def test_valid_cv (cv : Any ) -> None :
220228 """Test that valid cv raise no errors."""
221229 model = LinearRegression ()
222230 model .fit (X_toy , y_toy )
223- mapie_reg = MapieRegressor (estimator = model , cv = cv )
231+ mapie_reg = MapieRegressor (estimator = model , cv = cv , test_size = 0.5 )
224232 mapie_reg .fit (X_toy , y_toy )
225233 mapie_reg .predict (X_toy , alpha = 0.5 )
226234
@@ -237,7 +245,7 @@ def test_too_large_cv(cv: Any) -> None:
237245
238246
239247@pytest .mark .parametrize ("strategy" , [* STRATEGIES ])
240- @pytest .mark .parametrize ("dataset" , [(X , y ), ( X_toy , y_toy ) ])
248+ @pytest .mark .parametrize ("dataset" , [(X , y )])
241249@pytest .mark .parametrize ("alpha" , [0.2 , [0.2 , 0.4 ], (0.2 , 0.4 )])
242250def test_predict_output_shape (
243251 strategy : str , alpha : Any , dataset : Tuple [NDArray , NDArray ]
@@ -252,6 +260,46 @@ def test_predict_output_shape(
252260 assert y_pis .shape == (X .shape [0 ], 2 , n_alpha )
253261
254262
263+ @pytest .mark .parametrize ("delta" , [0.6 , 0.8 ])
264+ @pytest .mark .parametrize ("n_calib" , [10 + i for i in range (13 )] + [50 , 100 ])
265+ def test_coverage_validity (delta : float , n_calib : int ) -> None :
266+ """
267+ Test that the prefit method provides valid coverage
268+ for different calibration data sizes and coverage targets.
269+ """
270+ n_split , n_train , n_test = 100 , 100 , 1000
271+ n_all = n_train + n_calib + n_test
272+ X , y = make_regression (n_all , random_state = random_state )
273+ Xtr , Xct , ytr , yct = train_test_split (
274+ X , y , train_size = n_train , random_state = random_state
275+ )
276+
277+ model = LinearRegression ()
278+ model .fit (Xtr , ytr )
279+
280+ cov_list = []
281+ for _ in range (n_split ):
282+ mapie_reg = MapieRegressor (estimator = model , method = "base" , cv = "prefit" )
283+ Xc , Xt , yc , yt = train_test_split (Xct , yct , test_size = n_test )
284+ mapie_reg .fit (Xc , yc )
285+ _ , y_pis = mapie_reg .predict (Xt , alpha = 1 - delta )
286+ y_low , y_up = y_pis [:, 0 , 0 ], y_pis [:, 1 , 0 ]
287+ coverage = regression_coverage_score (yt , y_low , y_up )
288+ cov_list .append (coverage )
289+
290+ # Here we are testing whether the average coverage is statistically
291+ # less than the target coverage.
292+ mean_low , mean_up = delta , delta + 1 / (n_calib + 1 )
293+ _ , pval_low = ttest_1samp (cov_list , popmean = mean_low , alternative = 'less' )
294+ _ , pval_up = ttest_1samp (cov_list , popmean = mean_up , alternative = 'greater' )
295+
296+ # We perform a FWER controlling procedure (Bonferroni)
297+ p_fwer = 0.01 # probability of making one or more false discoveries: 1%
298+ p_bonf = p_fwer / 30 # because a total of 30 test_coverage_validity
299+ np .testing .assert_array_less (p_bonf , pval_low )
300+ np .testing .assert_array_less (p_bonf , pval_up )
301+
302+
255303def test_same_results_prefit_split () -> None :
256304 """
257305 Test checking that if split and prefit method have exactly
@@ -265,12 +313,12 @@ def test_same_results_prefit_split() -> None:
265313 X_train , X_calib = X [train_index ], X [val_index ]
266314 y_train , y_calib = y [train_index ], y [val_index ]
267315
268- mapie_reg = MapieRegressor (cv = cv )
316+ mapie_reg = MapieRegressor (method = 'base' , cv = cv )
269317 mapie_reg .fit (X , y )
270318 y_pred_1 , y_pis_1 = mapie_reg .predict (X , alpha = 0.1 )
271319
272320 model = LinearRegression ().fit (X_train , y_train )
273- mapie_reg = MapieRegressor (estimator = model , cv = "prefit" )
321+ mapie_reg = MapieRegressor (estimator = model , method = 'base' , cv = "prefit" )
274322 mapie_reg .fit (X_calib , y_calib )
275323 y_pred_2 , y_pis_2 = mapie_reg .predict (X , alpha = 0.1 )
276324
@@ -334,8 +382,8 @@ def test_results_single_and_multi_jobs(strategy: str) -> None:
334382 mapie_multi = MapieRegressor (n_jobs = - 1 , ** STRATEGIES [strategy ])
335383 mapie_single .fit (X_toy , y_toy )
336384 mapie_multi .fit (X_toy , y_toy )
337- y_pred_single , y_pis_single = mapie_single .predict (X_toy , alpha = 0.2 )
338- y_pred_multi , y_pis_multi = mapie_multi .predict (X_toy , alpha = 0.2 )
385+ y_pred_single , y_pis_single = mapie_single .predict (X_toy , alpha = 0.5 )
386+ y_pred_multi , y_pis_multi = mapie_multi .predict (X_toy , alpha = 0.5 )
339387 np .testing .assert_allclose (y_pred_single , y_pred_multi )
340388 np .testing .assert_allclose (y_pis_single , y_pis_multi )
341389
@@ -463,7 +511,7 @@ def test_linear_data_confidence_interval(strategy: str) -> None:
463511 """
464512 mapie = MapieRegressor (** STRATEGIES [strategy ])
465513 mapie .fit (X_toy , y_toy )
466- y_pred , y_pis = mapie .predict (X_toy , alpha = 0.2 )
514+ y_pred , y_pis = mapie .predict (X_toy , alpha = 0.5 )
467515 np .testing .assert_allclose (y_pis [:, 0 , 0 ], y_pis [:, 1 , 0 ])
468516 np .testing .assert_allclose (y_pred , y_pis [:, 0 , 0 ])
469517
@@ -506,7 +554,7 @@ def test_results_prefit_naive() -> None:
506554 is equivalent to the "naive" method.
507555 """
508556 estimator = LinearRegression ().fit (X , y )
509- mapie_reg = MapieRegressor (estimator = estimator , cv = "prefit" )
557+ mapie_reg = MapieRegressor (estimator = estimator , method = "base" , cv = "prefit" )
510558 mapie_reg .fit (X , y )
511559 _ , y_pis = mapie_reg .predict (X , alpha = 0.05 )
512560 width_mean = (y_pis [:, 1 , 0 ] - y_pis [:, 0 , 0 ]).mean ()
@@ -516,20 +564,17 @@ def test_results_prefit_naive() -> None:
516564
517565
518566def test_results_prefit () -> None :
519- """Test prefit results on a standard train/validation/test split."""
520- X_train_val , X_test , y_train_val , y_test = train_test_split (
521- X , y , test_size = 1 / 10 , random_state = 1
522- )
523- X_train , X_val , y_train , y_val = train_test_split (
524- X_train_val , y_train_val , test_size = 1 / 9 , random_state = 1
567+ """Test prefit results on a standard train/calibration split."""
568+ X_train , X_calib , y_train , y_calib = train_test_split (
569+ X , y , test_size = 1 / 2 , random_state = 1
525570 )
526571 estimator = LinearRegression ().fit (X_train , y_train )
527- mapie_reg = MapieRegressor (estimator = estimator , cv = "prefit" )
528- mapie_reg .fit (X_val , y_val )
529- _ , y_pis = mapie_reg .predict (X_test , alpha = 0.05 )
572+ mapie_reg = MapieRegressor (estimator = estimator , method = "base" , cv = "prefit" )
573+ mapie_reg .fit (X_calib , y_calib )
574+ _ , y_pis = mapie_reg .predict (X_calib , alpha = 0.05 )
530575 width_mean = (y_pis [:, 1 , 0 ] - y_pis [:, 0 , 0 ]).mean ()
531576 coverage = regression_coverage_score (
532- y_test , y_pis [:, 0 , 0 ], y_pis [:, 1 , 0 ]
577+ y_calib , y_pis [:, 0 , 0 ], y_pis [:, 1 , 0 ]
533578 )
534579 np .testing .assert_allclose (width_mean , WIDTHS ["prefit" ], rtol = 1e-2 )
535580 np .testing .assert_allclose (coverage , COVERAGES ["prefit" ], rtol = 1e-2 )
0 commit comments