Skip to content

Commit cda1250

Browse files
author
Thibault Cordier
committed
UPD: add comments and robust test (FWER procedure)
1 parent e747bf5 commit cda1250

File tree

4 files changed

+30
-26
lines changed

4 files changed

+30
-26
lines changed

HISTORY.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
History
33
=======
44

5-
0.8.3 (2024-**-**)
5+
0.8.4 (2024-**-**)
66
------------------
77

8+
* Fix the quantile formula to ensure valid coverage for any number of calibration data in `ConformityScore`.
89
* Fix conda versionning.
910
* Reduce precision for test in `MapieCalibrator`.
1011
* Fix invalid certificate when downloading data.

mapie/conformity_scores/conformity_scores.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,18 @@ def get_quantile(
247247
n_ref = conformity_scores.shape[1-axis]
248248
n_calib = np.min(np.sum(~np.isnan(conformity_scores), axis=axis))
249249
signed = 1-2*reversed
250+
251+
# Adapt alpha w.r.t upper/lower : alpha vs. 1-alpha
250252
alpha_ref = (1-2*alpha_np)*reversed + alpha_np
251253

254+
# Adjust alpha w.r.t quantile correction
255+
alpha_ref = np.ceil(alpha_ref*(n_calib+1))/n_calib
256+
257+
# Compute the target quantiles
252258
quantile = signed * np.column_stack([
253259
np_nanquantile(
254-
signed * conformity_scores.astype(float),
255-
np.ceil(_alpha*(n_calib+1))/n_calib,
256-
axis=axis,
257-
method="lower"
258-
) if 0 < np.ceil(_alpha*(n_calib+1))/n_calib < 1
260+
signed * conformity_scores, _alpha, axis=axis, method="lower"
261+
) if 0 < _alpha < 1
259262
else np.inf * np.ones(n_ref)
260263
for _alpha in alpha_ref
261264
])

mapie/tests/test_conformity_scores.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
from typing import Any
2-
31
import numpy as np
42
import pytest
53
from sklearn.linear_model import LinearRegression
@@ -385,7 +383,7 @@ def test_residual_normalised_prefit_get_estimation_distribution() -> None:
385383
@pytest.mark.parametrize("alpha", [[0.5], [0.5, 0.6]])
386384
def test_intervals_shape_with_every_score(
387385
score: ConformityScore,
388-
alpha: Any
386+
alpha: NDArray
389387
) -> None:
390388
estim = LinearRegression().fit(X_toy, y_toy)
391389
mapie_reg = MapieRegressor(

mapie/tests/test_regression.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from sklearn.pipeline import Pipeline, make_pipeline
1919
from sklearn.preprocessing import OneHotEncoder
2020
from sklearn.utils.validation import check_is_fitted
21+
from scipy.stats import ttest_1samp
2122
from typing_extensions import TypedDict
2223

2324
from mapie._typing import NDArray
@@ -155,14 +156,14 @@
155156

156157
COVERAGES = {
157158
"naive": 0.954,
158-
"split": 0.962,
159+
"split": 0.956,
159160
"jackknife": 0.956,
160161
"jackknife_plus": 0.952,
161162
"jackknife_minmax": 0.962,
162163
"cv": 0.954,
163164
"cv_plus": 0.954,
164165
"cv_minmax": 0.962,
165-
"prefit": 0.960,
166+
"prefit": 0.956,
166167
"cv_plus_median": 0.954,
167168
"jackknife_plus_ab": 0.952,
168169
"jackknife_minmax_ab": 0.968,
@@ -260,7 +261,7 @@ def test_predict_output_shape(
260261

261262

262263
@pytest.mark.parametrize("delta", [0.6, 0.8])
263-
@pytest.mark.parametrize("n_calib", [10 + i for i in range(11)] + [50, 100])
264+
@pytest.mark.parametrize("n_calib", [10 + i for i in range(13)] + [50, 100])
264265
def test_coverage_validity(delta: float, n_calib: int) -> None:
265266
"""
266267
Test that the prefit method provides valid coverage
@@ -269,33 +270,34 @@ def test_coverage_validity(delta: float, n_calib: int) -> None:
269270
n_split, n_train, n_test = 100, 100, 1000
270271
n_all = n_train + n_calib + n_test
271272
X, y = make_regression(n_all, random_state=random_state)
272-
273-
X_train, X_cal_test, y_train, y_cal_test = \
274-
train_test_split(X, y, train_size=n_train, random_state=random_state)
273+
Xtr, Xct, ytr, yct = train_test_split(
274+
X, y, train_size=n_train, random_state=random_state
275+
)
275276

276277
model = LinearRegression()
277-
model.fit(X_train, y_train)
278+
model.fit(Xtr, ytr)
278279

279280
cov_list = []
280281
for _ in range(n_split):
281282
mapie_reg = MapieRegressor(estimator=model, method="base", cv="prefit")
282-
X_cal, X_test, y_cal, y_test = \
283-
train_test_split(X_cal_test, y_cal_test, test_size=n_test)
284-
mapie_reg.fit(X_cal, y_cal)
285-
_, y_pis = mapie_reg.predict(X_test, alpha=1-delta)
286-
coverage = \
287-
regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
283+
Xc, Xt, yc, yt = train_test_split(Xct, yct, test_size=n_test)
284+
mapie_reg.fit(Xc, yc)
285+
_, y_pis = mapie_reg.predict(Xt, alpha=1-delta)
286+
y_low, y_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
287+
coverage = regression_coverage_score(yt, y_low, y_up)
288288
cov_list.append(coverage)
289289

290290
# Here we are testing whether the average coverage is statistically
291291
# less than the target coverage.
292-
from scipy.stats import ttest_1samp
293292
mean_low, mean_up = delta, delta + 1/(n_calib+1)
294293
_, pval_low = ttest_1samp(cov_list, popmean=mean_low, alternative='less')
295294
_, pval_up = ttest_1samp(cov_list, popmean=mean_up, alternative='greater')
296295

297-
np.testing.assert_array_less(0.01, pval_low)
298-
np.testing.assert_array_less(0.01, pval_up)
296+
# We perform a FWER controlling procedure (Bonferroni)
297+
p_fwer = 0.01 # probability of making one or more false discoveries: 1%
298+
p_bonf = p_fwer / 30 # because a total of 30 test_coverage_validity
299+
np.testing.assert_array_less(p_bonf, pval_low)
300+
np.testing.assert_array_less(p_bonf, pval_up)
299301

300302

301303
def test_same_results_prefit_split() -> None:
@@ -562,7 +564,7 @@ def test_results_prefit_naive() -> None:
562564

563565

564566
def test_results_prefit() -> None:
565-
"""Test prefit results on a standard train/validation/test split."""
567+
"""Test prefit results on a standard train/calibration split."""
566568
X_train, X_calib, y_train, y_calib = train_test_split(
567569
X, y, test_size=1/2, random_state=1
568570
)

0 commit comments

Comments
 (0)