Skip to content

Commit 69904ca

Browse files
authored
Merge pull request #214 from scikit-learn-contrib/add_prefit_cqr
prefit for CQR
2 parents 04a4555 + fb88e2a commit 69904ca

File tree

8 files changed

+560
-159
lines changed

8 files changed

+560
-159
lines changed

environment.doc.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6+
- lightgbm=3.1.1
67
- numpydoc=1.1.0
78
- pandas=1.3.5
89
- python=3.8
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
"""
2+
===========================================================================
3+
Example use of the prefit parameter with neural networks and LGBM Regressor
4+
===========================================================================
5+
6+
:class:`mapie.regression.MapieRegressor` and
7+
:class:`mapie.quantile_regression.MapieQuantileRegressor`
8+
are used to calibrate uncertainties for large models for
9+
which the cost of cross-validation is too high. Typically,
10+
neural networks rely on a single validation set.
11+
12+
In this example, we first fit a neural network on the training set. We
13+
then compute residuals on a validation set with the `cv="prefit"` parameter.
14+
Finally, we evaluate the model with prediction intervals on a testing set.
15+
We will also show how to use the prefit method in the conformalized quantile
16+
regressor.
17+
"""
18+
19+
20+
import numpy as np
21+
from lightgbm import LGBMRegressor
22+
from matplotlib import pyplot as plt
23+
import scipy
24+
from sklearn.model_selection import train_test_split
25+
from sklearn.neural_network import MLPRegressor
26+
import warnings
27+
28+
from mapie.regression import MapieRegressor
29+
from mapie.quantile_regression import MapieQuantileRegressor
30+
from mapie.metrics import regression_coverage_score
31+
from mapie._typing import NDArray
32+
33+
warnings.filterwarnings("ignore")
34+
35+
alpha = 0.1
36+
37+
##############################################################################
38+
# 1. Generate dataset
39+
# -----------------------------------------------------------------------------
40+
#
41+
# We start by defining a function that we will use to generate data. We then
42+
# add random noise to the y values. Then we split the dataset to have
43+
# a training, calibration and test set.
44+
45+
46+
def f(x: NDArray) -> NDArray:
47+
"""Polynomial function used to generate one-dimensional data."""
48+
return np.array(5 * x + 5 * x**4 - 9 * x**2)
49+
50+
51+
# Generate data
52+
sigma = 0.1
53+
n_samples = 10000
54+
X = np.linspace(0, 1, n_samples)
55+
y = f(X) + np.random.normal(0, sigma, n_samples)
56+
57+
# Train/validation/test split
58+
X_train_cal, X_test, y_train_cal, y_test = train_test_split(
59+
X, y, test_size=1 / 10
60+
)
61+
X_train, X_cal, y_train, y_cal = train_test_split(
62+
X_train_cal, y_train_cal, test_size=1 / 9
63+
)
64+
65+
66+
##############################################################################
67+
# 2. Pre-train models
68+
# -----------------------------------------------------------------------------
69+
#
70+
# For this example, we will train a MLPRegressor for
71+
# :class:`mapie.regression.MapieRegressor` and multiple LGBMRegressor with a
72+
# quantile objective as this is a requirement to perform conformalized
73+
# quantile regression using
74+
# :class:`mapie.quanitle_regression.MapieQuantileRegressor`. Note that the
75+
# three estimators need to be trained at quantile values of
76+
# :math:`(\alpha/2, 1-(\alpha/2), 0.5)`.
77+
78+
79+
# Train a MLPRegressor for MapieRegressor
80+
est_mlp = MLPRegressor(activation="relu", random_state=1)
81+
est_mlp.fit(X_train.reshape(-1, 1), y_train)
82+
83+
# Train LGBMRegressor models for MapieQuantileRegressor
84+
list_estimators_cqr = []
85+
for alpha_ in [alpha / 2, (1 - (alpha / 2)), 0.5]:
86+
estimator_ = LGBMRegressor(
87+
objective='quantile',
88+
alpha=alpha_,
89+
)
90+
estimator_.fit(X_train.reshape(-1, 1), y_train)
91+
list_estimators_cqr.append(estimator_)
92+
93+
94+
##############################################################################
95+
# 3. Using MAPIE to calibrate the models
96+
# -----------------------------------------------------------------------------
97+
#
98+
# We will now proceed to calibrate the models using MAPIE. To this aim, we set
99+
# `cv="prefit"` so that we use the models that we already trained prior.
100+
# We then precict using the test set and evaluate its coverage.
101+
102+
103+
# Calibrate uncertainties on calibration set
104+
mapie = MapieRegressor(est_mlp, cv="prefit")
105+
mapie.fit(X_cal.reshape(-1, 1), y_cal)
106+
107+
# Evaluate prediction and coverage level on testing set
108+
y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=alpha)
109+
coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
110+
111+
# Calibrate uncertainties on calibration set
112+
mapie_cqr = MapieQuantileRegressor(list_estimators_cqr, cv="prefit")
113+
mapie_cqr.fit(X_cal.reshape(-1, 1), y_cal)
114+
115+
# Evaluate prediction and coverage level on testing set
116+
y_pred_cqr, y_pis_cqr = mapie_cqr.predict(X_test.reshape(-1, 1))
117+
coverage_cqr = regression_coverage_score(
118+
y_test,
119+
y_pis_cqr[:, 0, 0],
120+
y_pis_cqr[:, 1, 0]
121+
)
122+
123+
124+
##############################################################################
125+
# 4. Plots
126+
# -----------------------------------------------------------------------------
127+
#
128+
# In order to view the results shown above, we will plot each other predictions
129+
# with their prediction interval. The multi-layer perceptron (MLP) with
130+
# :class:`mapie.regression.MapieRegressor` and LGBMRegressor with
131+
# :class:`mapie.quantile_regression.MapieQuantileRegressor`.
132+
133+
# Plot obtained prediction intervals on testing set
134+
theoretical_semi_width = scipy.stats.norm.ppf(1 - alpha) * sigma
135+
y_test_theoretical = f(X_test)
136+
order = np.argsort(X_test)
137+
138+
plt.figure(figsize=(8, 8))
139+
plt.plot(
140+
X_test[order],
141+
y_pred[order],
142+
label="Predictions MLP",
143+
color="green"
144+
)
145+
plt.fill_between(
146+
X_test[order],
147+
y_pis[:, 0, 0][order],
148+
y_pis[:, 1, 0][order],
149+
alpha=0.4,
150+
label="prediction intervals MP",
151+
color="green"
152+
)
153+
plt.plot(
154+
X_test[order],
155+
y_pred_cqr[order],
156+
label="Predictions LGBM",
157+
color="blue"
158+
)
159+
plt.fill_between(
160+
X_test[order],
161+
y_pis_cqr[:, 0, 0][order],
162+
y_pis_cqr[:, 1, 0][order],
163+
alpha=0.4,
164+
label="prediction intervals MQP",
165+
color="blue"
166+
)
167+
plt.title(
168+
f"Target and effective coverages for:\n "
169+
f"MLP with MapieRegressor alpha={alpha}: "
170+
+ f"({1 - alpha:.3f}, {coverage:.3f})\n"
171+
f"LGBM with MapieQuantileRegressor alpha={alpha}: "
172+
+ f"({1 - alpha:.3f}, {coverage_cqr:.3f})"
173+
)
174+
plt.scatter(X_test, y_test, color="red", alpha=0.7, label="testing", s=2)
175+
plt.plot(
176+
X_test[order],
177+
y_test_theoretical[order],
178+
color="gray",
179+
label="True confidence intervals",
180+
)
181+
plt.plot(
182+
X_test[order],
183+
y_test_theoretical[order] - theoretical_semi_width,
184+
color="gray",
185+
ls="--",
186+
)
187+
plt.plot(
188+
X_test[order],
189+
y_test_theoretical[order] + theoretical_semi_width,
190+
color="gray",
191+
ls="--",
192+
)
193+
plt.xlabel("x")
194+
plt.ylabel("y")
195+
plt.legend(
196+
loc='upper center',
197+
bbox_to_anchor=(0.5, -0.05),
198+
fancybox=True,
199+
shadow=True,
200+
ncol=3
201+
)
202+
plt.show()

examples/regression/1-quickstart/plot_prefit_nn.py

Lines changed: 0 additions & 91 deletions
This file was deleted.

0 commit comments

Comments
 (0)