@@ -260,13 +260,13 @@ def test_predict_output_shape(
260260
261261
262262@pytest .mark .parametrize ("delta" , [0.5 , 0.6 , 0.7 , 0.8 ])
263- @pytest .mark .parametrize ("n_calib" , [10 , 20 , 50 , 100 ])
263+ @pytest .mark .parametrize ("n_calib" , [10 , 15 , 20 , 25 , 50 , 100 , 1000 ])
264264def test_coverage_validity (delta : float , n_calib : int ) -> None :
265265 """
266266 Test that the prefit method provides valid coverage
267267 for different calibration data sizes and coverage targets.
268268 """
269- n_split , n_train , n_test = 1000 , 100 , 100
269+ n_split , n_train , n_test = 1000 , 100 , 1000
270270 n_all = n_train + n_calib + n_test
271271 X , y = make_regression (n_all , random_state = random_state )
272272
@@ -287,8 +287,12 @@ def test_coverage_validity(delta: float, n_calib: int) -> None:
287287 regression_coverage_score (y_test , y_pis [:, 0 , 0 ], y_pis [:, 1 , 0 ])
288288 coverage_list .append (coverage )
289289
290- mean_coverage = np .mean (coverage_list )
291- np .testing .assert_array_less (delta , mean_coverage )
290+ # Here we are testing whether the average coverage is statistically
291+ # less than the target coverage.
292+ from scipy .stats import ttest_1samp
293+ _ , pval = ttest_1samp (coverage_list , popmean = delta , alternative = 'less' )
294+
295+ np .testing .assert_array_less (0.05 , pval )
292296
293297
294298def test_same_results_prefit_split () -> None :
0 commit comments