Skip to content

Commit 1eeba9a

Browse files
committed
take into account VTA comments
1 parent 06e778e commit 1eeba9a

File tree

6 files changed

+186
-81
lines changed

6 files changed

+186
-81
lines changed

environment.doc.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6+
- lightgbm=3.1.1
67
- numpydoc=1.1.0
78
- pandas=1.3.5
89
- python=3.8
@@ -11,4 +12,3 @@ dependencies:
1112
- sphinx-gallery=0.10.1
1213
- sphinx_rtd_theme=1.0.0
1314
- typing_extensions=4.0.1
14-
- lightgbm=3.1.1
Lines changed: 125 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
"""
2-
========================================================
3-
Example use of the prefit parameter with neural networks
4-
========================================================
2+
===========================================================================
3+
Example use of the prefit parameter with neural networks and LGBM Regressor
4+
===========================================================================
55
66
:class:`mapie.regression.MapieRegressor` and
7-
:class:`mapie.quantile_regression.MapieQuantileRegressor``
7+
:class:`mapie.quantile_regression.MapieQuantileRegressor`
88
are used to calibrate uncertainties for large models for
99
which the cost of cross-validation is too high. Typically,
1010
neural networks rely on a single validation set.
1111
1212
In this example, we first fit a neural network on the training set. We
1313
then compute residuals on a validation set with the `cv="prefit"` parameter.
1414
Finally, we evaluate the model with prediction intervals on a testing set.
15+
We will also show how to use the prefit method in the comformalized quantile
16+
regressor.
1517
"""
1618

1719

@@ -20,11 +22,24 @@
2022
from matplotlib import pyplot as plt
2123
import scipy
2224
from sklearn.model_selection import train_test_split
25+
from sklearn.neural_network import MLPRegressor
2326

2427
from mapie.regression import MapieRegressor
2528
from mapie.quantile_regression import MapieQuantileRegressor
2629
from mapie.metrics import regression_coverage_score
2730
from mapie._typing import NDArray
31+
import warnings
32+
warnings.filterwarnings("ignore")
33+
34+
alpha = 0.1
35+
36+
##############################################################################
37+
# 1. Generate dataset
38+
# -----------------------------------------------------------------------------
39+
#
40+
# We start by defining a function that we will use to generate data. We then
41+
# add random noise y values. Then we split the dataset to have a training,
42+
# calibration and test set.
2843

2944

3045
def f(x: NDArray) -> NDArray:
@@ -39,67 +54,123 @@ def f(x: NDArray) -> NDArray:
3954
y = f(X) + np.random.normal(0, sigma, n_samples)
4055

4156
# Train/validation/test split
42-
X_train_val, X_test, y_train_val, y_test = train_test_split(
57+
X_train_cal, X_test, y_train_cal, y_test = train_test_split(
4358
X, y, test_size=1 / 10
4459
)
45-
X_train, X_val, y_train, y_val = train_test_split(
46-
X_train_val, y_train_val, test_size=1 / 9
60+
X_train, X_cal, y_train, y_cal = train_test_split(
61+
X_train_cal, y_train_cal, test_size=1 / 9
4762
)
4863

49-
# Train model on training set for MapieRegressor
50-
model = estimator = LGBMRegressor(
51-
objective='quantile',
52-
alpha=0.5,
53-
)
54-
model.fit(X_train.reshape(-1, 1), y_train)
5564

56-
# Calibrate uncertainties on validation set
57-
mapie = MapieRegressor(model, cv="prefit")
58-
mapie.fit(X_val.reshape(-1, 1), y_val)
65+
##############################################################################
66+
# 2. Pre-train models
67+
# -----------------------------------------------------------------------------
68+
#
69+
# For this example, we will train a MLPRegressor for
70+
# :class:`mapie.regression.MapieRegressor` and multiple LGBMRegressor in the
71+
# with a quantile objective as this is a requirement to perform conformalized
72+
# quantile regression using
73+
# :class:`mapie.quanitle_regression.MapieQuantileRegressor`. Note that the
74+
# three estimators need to be trained at quantile values of
75+
# $(\alpha/2, 1-(\alpha/2), 0.5)$.
76+
77+
78+
# Train a MLPRegressor for MapieRegressor
79+
est_mlp = MLPRegressor(activation="relu", random_state=1)
80+
est_mlp.fit(X_train.reshape(-1, 1), y_train)
81+
82+
# Train LGBMRegressor models for MapieQuantileRegressor
83+
list_estimators_cqr = []
84+
for alpha_ in [alpha/2, (1-(alpha/2)), 0.5]:
85+
estimator_ = LGBMRegressor(
86+
objective='quantile',
87+
alpha=alpha_,
88+
)
89+
estimator_.fit(X_train.reshape(-1, 1), y_train)
90+
list_estimators_cqr.append(estimator_)
91+
92+
93+
##############################################################################
94+
# 3. Using MAPIE to calibrate the models
95+
# -----------------------------------------------------------------------------
96+
#
97+
# We will now proceed to calibrate the models using MAPIE. This means using
98+
# the `cv="prefit"` so that we use the models that we already trained prior.
99+
# We then precict using the test set and evaluate its coverage.
100+
101+
102+
# Calibrate uncertainties on calibration set
103+
mapie = MapieRegressor(est_mlp, cv="prefit")
104+
mapie.fit(X_cal.reshape(-1, 1), y_cal)
59105

60106
# Evaluate prediction and coverage level on testing set
61-
alpha = 0.1
62107
y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=alpha)
63-
y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
64-
coverage = regression_coverage_score(y_test, y_pred_low, y_pred_up)
65-
66-
# Train models for MapieQuantileRegressor
67-
list_estimators = []
68-
estimator_low = LGBMRegressor(
69-
objective='quantile',
70-
alpha=(alpha/2),
71-
)
72-
estimator_low.fit(X_train.reshape(-1, 1), y_train)
73-
list_estimators.append(estimator_low)
108+
coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
74109

75-
estimator_high = LGBMRegressor(
76-
objective='quantile',
77-
alpha=(1-(alpha/2)),
78-
)
79-
estimator_high.fit(X_train.reshape(-1, 1), y_train)
80-
list_estimators.append(estimator_high)
110+
# Calibrate uncertainties on calibration set
111+
mapie_cqr = MapieQuantileRegressor(list_estimators_cqr, cv="prefit")
112+
mapie_cqr.fit(X_cal.reshape(-1, 1), y_cal)
81113

114+
# Evaluate prediction and coverage level on testing set
115+
y_pred_cqr, y_pis_cqr = mapie_cqr.predict(X_test.reshape(-1, 1))
116+
coverage_cqr = regression_coverage_score(
117+
y_test,
118+
y_pis_cqr[:, 0, 0],
119+
y_pis_cqr[:, 1, 0]
120+
)
82121

83-
estimator = LGBMRegressor(
84-
objective='quantile',
85-
alpha=0.5,
86-
) # Note that this is the same model as used for QR
87-
estimator.fit(X_train.reshape(-1, 1), y_train)
88-
list_estimators.append(estimator)
89122

90-
# Calibrate uncertainties on validation set
91-
mapie_cqr = MapieQuantileRegressor(list_estimators, cv="prefit")
92-
mapie_cqr.fit(X_val.reshape(-1, 1), y_val)
93-
y_pred_cqr, y_pis_cqr = mapie_cqr.predict(X_test.reshape(-1, 1))
94-
y_pred_low_cqr, y_pred_up_cqr = y_pis_cqr[:, 0, 0], y_pis_cqr[:, 1, 0]
95-
coverage_cqr = regression_coverage_score(y_test, y_pred_low_cqr, y_pred_up_cqr)
123+
##############################################################################
124+
# 4. Plots
125+
# -----------------------------------------------------------------------------
126+
#
127+
# In order to view the results shown above, we will plot each othe predictions
128+
# with their prediction interval. The multi-layer perceptron (MLP) with
129+
# :class:`mapie.regression.MapieRegressor` and LGBMRegressor with
130+
# :class:`mapie.quantile_regression.MapieQuantileRegressor`.
96131

97132
# Plot obtained prediction intervals on testing set
98133
theoretical_semi_width = scipy.stats.norm.ppf(1 - alpha) * sigma
99134
y_test_theoretical = f(X_test)
100135
order = np.argsort(X_test)
101136

102-
plt.scatter(X_test, y_test, color="red", alpha=0.3, label="testing", s=2)
137+
plt.figure(figsize=(8, 8))
138+
plt.plot(
139+
X_test[order],
140+
y_pred[order],
141+
label="Predictions MLP",
142+
color="green"
143+
)
144+
plt.fill_between(
145+
X_test[order],
146+
y_pis[:, 0, 0][order],
147+
y_pis[:, 1, 0][order],
148+
alpha=0.4,
149+
label="prediction intervals MP",
150+
color="green"
151+
)
152+
plt.plot(
153+
X_test[order],
154+
y_pred_cqr[order],
155+
label="Predictions LGBM",
156+
color="blue"
157+
)
158+
plt.fill_between(
159+
X_test[order],
160+
y_pis_cqr[:, 0, 0][order],
161+
y_pis_cqr[:, 1, 0][order],
162+
alpha=0.4,
163+
label="prediction intervals MQP",
164+
color="blue"
165+
)
166+
plt.title(
167+
f"Target and effective coverages for:\n "
168+
f"MLP with MapieRegressor alpha={alpha}: "
169+
+ f"({1 - alpha:.3f}, {coverage:.3f})\n"
170+
f"LGBM with MapieQuantileRegressor alpha={alpha}: "
171+
+ f"({1 - alpha:.3f}, {coverage_cqr:.3f})"
172+
)
173+
plt.scatter(X_test, y_test, color="red", alpha=0.7, label="testing", s=2)
103174
plt.plot(
104175
X_test[order],
105176
y_test_theoretical[order],
@@ -118,27 +189,13 @@ def f(x: NDArray) -> NDArray:
118189
color="gray",
119190
ls="--",
120191
)
121-
plt.plot(X_test[order], y_pred[order], label="Predictions")
122-
plt.fill_between(
123-
X_test[order],
124-
y_pred_low[order],
125-
y_pred_up[order],
126-
alpha=0.4,
127-
label="prediction intervals QR"
128-
)
129-
plt.fill_between(
130-
X_test[order],
131-
y_pred_low_cqr[order],
132-
y_pred_up_cqr[order],
133-
alpha=0.4,
134-
label="prediction intervals CQR"
135-
)
136-
plt.title(
137-
f"Target and effective coverages for:\n "
138-
f"QR alpha={alpha}: ({1 - alpha:.3f}, {coverage:.3f})\n"
139-
f"CQR alpha={alpha}: ({1 - alpha:.3f}, {coverage_cqr:.3f})"
140-
)
141192
plt.xlabel("x")
142193
plt.ylabel("y")
143-
plt.legend()
194+
plt.legend(
195+
loc='upper center',
196+
bbox_to_anchor=(0.5, -0.05),
197+
fancybox=True,
198+
shadow=True,
199+
ncol=3
200+
)
144201
plt.show()

mapie/quantile_regression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def _check_prefit_params(
444444
If the alpha is defined, warns the user that it must be set
445445
accordingly with the prefit estimators.
446446
"""
447-
if hasattr(estimator, '__iter__') is False:
447+
if isinstance(estimator, Iterable) is False:
448448
raise ValueError(
449449
"Estimator for prefit must be an iterable object."
450450
)

mapie/regression.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
check_null_weight,
3232
check_verbose,
3333
fit_estimator,
34+
check_estimator_fit_predict,
3435
)
3536

3637

@@ -320,11 +321,7 @@ def _check_estimator(
320321
"""
321322
if estimator is None:
322323
return LinearRegression()
323-
if not (hasattr(estimator, "fit") and hasattr(estimator, "predict")):
324-
raise ValueError(
325-
"Invalid estimator."
326-
"Please provide a regressor with fit and predict methods."
327-
)
324+
check_estimator_fit_predict(estimator)
328325
if self.cv == "prefit":
329326
if isinstance(self.estimator, Pipeline):
330327
check_is_fitted(self.estimator[-1])

mapie/tests/test_quantile_regression.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,8 @@ def test_linear_regression_results(strategy: str) -> None:
519519
np.testing.assert_allclose(coverage, COVERAGES[strategy], rtol=1e-2)
520520

521521

522-
def test_quantile_prefit_non_list() -> None:
522+
@pytest.mark.parametrize("estimator", [-1, 3, KFold(), LeaveOneOut()])
523+
def test_quantile_prefit_non_list(estimator: Any) -> None:
523524
"""
524525
Test that there is a list of estimators provided when cv='prefit'
525526
is called for MapieQuantileRegressor.
@@ -528,9 +529,8 @@ def test_quantile_prefit_non_list() -> None:
528529
ValueError,
529530
match=r".*Estimator for prefit must be an iterable object.*",
530531
):
531-
not_an_iterable = 10
532532
mapie_reg = MapieQuantileRegressor(
533-
estimator=not_an_iterable,
533+
estimator=estimator,
534534
cv="prefit"
535535
)
536536
mapie_reg.fit(
@@ -541,7 +541,7 @@ def test_quantile_prefit_non_list() -> None:
541541

542542
def test_quantile_prefit_three_estimators() -> None:
543543
"""
544-
Test that there is a list of estimators three estimators provided for
544+
Test that there is a list with three estimators provided for
545545
cv="prefit".
546546
"""
547547
with pytest.raises(
@@ -564,7 +564,7 @@ def test_quantile_prefit_three_estimators() -> None:
564564

565565
def test_prefit_no_fit_predict() -> None:
566566
"""
567-
Check that the user is warned that the alphas need to be correctly set.
567+
Check that the estimators given have a prefit and fit attribute.
568568
"""
569569
with pytest.raises(
570570
ValueError,
@@ -588,7 +588,7 @@ def test_prefit_no_fit_predict() -> None:
588588

589589
def test_non_trained_estimator() -> None:
590590
"""
591-
Check that the user is warned that the alphas need to be correctly set.
591+
Check that the estimators are all already trained when used in prefit.
592592
"""
593593
with pytest.raises(
594594
ValueError,
@@ -633,6 +633,37 @@ def test_warning_alpha_prefit() -> None:
633633
)
634634

635635

636+
def test_prefit_and_non_prefit_equal() -> None:
637+
"""
638+
Check that the user is warned that the alphas need to be correctly set.
639+
"""
640+
list_estimators = []
641+
alphas_ = [0.15, 0.85, 0.5]
642+
for alpha_ in alphas_:
643+
est = clone(qt)
644+
params = {"quantile": alpha_}
645+
est.set_params(**params)
646+
est.fit(X_train, y_train)
647+
list_estimators.append(est)
648+
mapie_reg_prefit = MapieQuantileRegressor(
649+
estimator=list_estimators,
650+
cv="prefit",
651+
alpha=0.3
652+
)
653+
mapie_reg_prefit.fit(X_calib, y_calib)
654+
y_pred_prefit, y_pis_prefit = mapie_reg_prefit.predict(X)
655+
656+
mapie_reg = MapieQuantileRegressor(
657+
estimator=qt,
658+
alpha=0.3
659+
)
660+
mapie_reg.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib)
661+
y_pred, y_pis = mapie_reg.predict(X)
662+
663+
np.testing.assert_allclose(y_pred_prefit, y_pred)
664+
np.testing.assert_allclose(y_pis_prefit, y_pis)
665+
666+
636667
@pytest.mark.parametrize("estimator", ESTIMATOR)
637668
def test_pipeline_compatibility(estimator: RegressorMixin) -> None:
638669
"""Check that MAPIE works on pipeline based on pandas dataframes"""

0 commit comments

Comments
 (0)