11"""
2- ========================================================
3- Example use of the prefit parameter with neural networks
4- ========================================================
2+ ===========================================================================
3+ Example use of the prefit parameter with neural networks and LGBM Regressor
4+ ===========================================================================
55
66:class:`mapie.regression.MapieRegressor` and
7- :class:`mapie.quantile_regression.MapieQuantileRegressor``
7+ :class:`mapie.quantile_regression.MapieQuantileRegressor`
88are used to calibrate uncertainties for large models for
99which the cost of cross-validation is too high. Typically,
1010neural networks rely on a single validation set.
1111
1212In this example, we first fit a neural network on the training set. We
1313then compute residuals on a validation set with the `cv="prefit"` parameter.
1414Finally, we evaluate the model with prediction intervals on a testing set.
15+ We will also show how to use the prefit method in the comformalized quantile
16+ regressor.
1517"""
1618
1719
2022from matplotlib import pyplot as plt
2123import scipy
2224from sklearn .model_selection import train_test_split
25+ from sklearn .neural_network import MLPRegressor
2326
2427from mapie .regression import MapieRegressor
2528from mapie .quantile_regression import MapieQuantileRegressor
2629from mapie .metrics import regression_coverage_score
2730from mapie ._typing import NDArray
31+ import warnings
32+ warnings .filterwarnings ("ignore" )
33+
34+ alpha = 0.1
35+
36+ ##############################################################################
37+ # 1. Generate dataset
38+ # -----------------------------------------------------------------------------
39+ #
40+ # We start by defining a function that we will use to generate data. We then
41+ # add random noise y values. Then we split the dataset to have a training,
42+ # calibration and test set.
2843
2944
3045def f (x : NDArray ) -> NDArray :
@@ -39,67 +54,123 @@ def f(x: NDArray) -> NDArray:
3954y = f (X ) + np .random .normal (0 , sigma , n_samples )
4055
4156# Train/validation/test split
42- X_train_val , X_test , y_train_val , y_test = train_test_split (
57+ X_train_cal , X_test , y_train_cal , y_test = train_test_split (
4358 X , y , test_size = 1 / 10
4459)
45- X_train , X_val , y_train , y_val = train_test_split (
46- X_train_val , y_train_val , test_size = 1 / 9
60+ X_train , X_cal , y_train , y_cal = train_test_split (
61+ X_train_cal , y_train_cal , test_size = 1 / 9
4762)
4863
49- # Train model on training set for MapieRegressor
50- model = estimator = LGBMRegressor (
51- objective = 'quantile' ,
52- alpha = 0.5 ,
53- )
54- model .fit (X_train .reshape (- 1 , 1 ), y_train )
5564
56- # Calibrate uncertainties on validation set
57- mapie = MapieRegressor (model , cv = "prefit" )
58- mapie .fit (X_val .reshape (- 1 , 1 ), y_val )
65+ ##############################################################################
66+ # 2. Pre-train models
67+ # -----------------------------------------------------------------------------
68+ #
69+ # For this example, we will train a MLPRegressor for
70+ # :class:`mapie.regression.MapieRegressor` and multiple LGBMRegressor in the
71+ # with a quantile objective as this is a requirement to perform conformalized
72+ # quantile regression using
73+ # :class:`mapie.quanitle_regression.MapieQuantileRegressor`. Note that the
74+ # three estimators need to be trained at quantile values of
75+ # $(\alpha/2, 1-(\alpha/2), 0.5)$.
76+
77+
78+ # Train a MLPRegressor for MapieRegressor
79+ est_mlp = MLPRegressor (activation = "relu" , random_state = 1 )
80+ est_mlp .fit (X_train .reshape (- 1 , 1 ), y_train )
81+
82+ # Train LGBMRegressor models for MapieQuantileRegressor
83+ list_estimators_cqr = []
84+ for alpha_ in [alpha / 2 , (1 - (alpha / 2 )), 0.5 ]:
85+ estimator_ = LGBMRegressor (
86+ objective = 'quantile' ,
87+ alpha = alpha_ ,
88+ )
89+ estimator_ .fit (X_train .reshape (- 1 , 1 ), y_train )
90+ list_estimators_cqr .append (estimator_ )
91+
92+
93+ ##############################################################################
94+ # 3. Using MAPIE to calibrate the models
95+ # -----------------------------------------------------------------------------
96+ #
97+ # We will now proceed to calibrate the models using MAPIE. This means using
98+ # the `cv="prefit"` so that we use the models that we already trained prior.
99+ # We then precict using the test set and evaluate its coverage.
100+
101+
102+ # Calibrate uncertainties on calibration set
103+ mapie = MapieRegressor (est_mlp , cv = "prefit" )
104+ mapie .fit (X_cal .reshape (- 1 , 1 ), y_cal )
59105
60106# Evaluate prediction and coverage level on testing set
61- alpha = 0.1
62107y_pred , y_pis = mapie .predict (X_test .reshape (- 1 , 1 ), alpha = alpha )
63- y_pred_low , y_pred_up = y_pis [:, 0 , 0 ], y_pis [:, 1 , 0 ]
64- coverage = regression_coverage_score (y_test , y_pred_low , y_pred_up )
65-
66- # Train models for MapieQuantileRegressor
67- list_estimators = []
68- estimator_low = LGBMRegressor (
69- objective = 'quantile' ,
70- alpha = (alpha / 2 ),
71- )
72- estimator_low .fit (X_train .reshape (- 1 , 1 ), y_train )
73- list_estimators .append (estimator_low )
108+ coverage = regression_coverage_score (y_test , y_pis [:, 0 , 0 ], y_pis [:, 1 , 0 ])
74109
75- estimator_high = LGBMRegressor (
76- objective = 'quantile' ,
77- alpha = (1 - (alpha / 2 )),
78- )
79- estimator_high .fit (X_train .reshape (- 1 , 1 ), y_train )
80- list_estimators .append (estimator_high )
110+ # Calibrate uncertainties on calibration set
111+ mapie_cqr = MapieQuantileRegressor (list_estimators_cqr , cv = "prefit" )
112+ mapie_cqr .fit (X_cal .reshape (- 1 , 1 ), y_cal )
81113
114+ # Evaluate prediction and coverage level on testing set
115+ y_pred_cqr , y_pis_cqr = mapie_cqr .predict (X_test .reshape (- 1 , 1 ))
116+ coverage_cqr = regression_coverage_score (
117+ y_test ,
118+ y_pis_cqr [:, 0 , 0 ],
119+ y_pis_cqr [:, 1 , 0 ]
120+ )
82121
83- estimator = LGBMRegressor (
84- objective = 'quantile' ,
85- alpha = 0.5 ,
86- ) # Note that this is the same model as used for QR
87- estimator .fit (X_train .reshape (- 1 , 1 ), y_train )
88- list_estimators .append (estimator )
89122
90- # Calibrate uncertainties on validation set
91- mapie_cqr = MapieQuantileRegressor (list_estimators , cv = "prefit" )
92- mapie_cqr .fit (X_val .reshape (- 1 , 1 ), y_val )
93- y_pred_cqr , y_pis_cqr = mapie_cqr .predict (X_test .reshape (- 1 , 1 ))
94- y_pred_low_cqr , y_pred_up_cqr = y_pis_cqr [:, 0 , 0 ], y_pis_cqr [:, 1 , 0 ]
95- coverage_cqr = regression_coverage_score (y_test , y_pred_low_cqr , y_pred_up_cqr )
123+ ##############################################################################
124+ # 4. Plots
125+ # -----------------------------------------------------------------------------
126+ #
127+ # In order to view the results shown above, we will plot each othe predictions
128+ # with their prediction interval. The multi-layer perceptron (MLP) with
129+ # :class:`mapie.regression.MapieRegressor` and LGBMRegressor with
130+ # :class:`mapie.quantile_regression.MapieQuantileRegressor`.
96131
97132# Plot obtained prediction intervals on testing set
98133theoretical_semi_width = scipy .stats .norm .ppf (1 - alpha ) * sigma
99134y_test_theoretical = f (X_test )
100135order = np .argsort (X_test )
101136
102- plt .scatter (X_test , y_test , color = "red" , alpha = 0.3 , label = "testing" , s = 2 )
137+ plt .figure (figsize = (8 , 8 ))
138+ plt .plot (
139+ X_test [order ],
140+ y_pred [order ],
141+ label = "Predictions MLP" ,
142+ color = "green"
143+ )
144+ plt .fill_between (
145+ X_test [order ],
146+ y_pis [:, 0 , 0 ][order ],
147+ y_pis [:, 1 , 0 ][order ],
148+ alpha = 0.4 ,
149+ label = "prediction intervals MP" ,
150+ color = "green"
151+ )
152+ plt .plot (
153+ X_test [order ],
154+ y_pred_cqr [order ],
155+ label = "Predictions LGBM" ,
156+ color = "blue"
157+ )
158+ plt .fill_between (
159+ X_test [order ],
160+ y_pis_cqr [:, 0 , 0 ][order ],
161+ y_pis_cqr [:, 1 , 0 ][order ],
162+ alpha = 0.4 ,
163+ label = "prediction intervals MQP" ,
164+ color = "blue"
165+ )
166+ plt .title (
167+ f"Target and effective coverages for:\n "
168+ f"MLP with MapieRegressor alpha={ alpha } : "
169+ + f"({ 1 - alpha :.3f} , { coverage :.3f} )\n "
170+ f"LGBM with MapieQuantileRegressor alpha={ alpha } : "
171+ + f"({ 1 - alpha :.3f} , { coverage_cqr :.3f} )"
172+ )
173+ plt .scatter (X_test , y_test , color = "red" , alpha = 0.7 , label = "testing" , s = 2 )
103174plt .plot (
104175 X_test [order ],
105176 y_test_theoretical [order ],
@@ -118,27 +189,13 @@ def f(x: NDArray) -> NDArray:
118189 color = "gray" ,
119190 ls = "--" ,
120191)
121- plt .plot (X_test [order ], y_pred [order ], label = "Predictions" )
122- plt .fill_between (
123- X_test [order ],
124- y_pred_low [order ],
125- y_pred_up [order ],
126- alpha = 0.4 ,
127- label = "prediction intervals QR"
128- )
129- plt .fill_between (
130- X_test [order ],
131- y_pred_low_cqr [order ],
132- y_pred_up_cqr [order ],
133- alpha = 0.4 ,
134- label = "prediction intervals CQR"
135- )
136- plt .title (
137- f"Target and effective coverages for:\n "
138- f"QR alpha={ alpha } : ({ 1 - alpha :.3f} , { coverage :.3f} )\n "
139- f"CQR alpha={ alpha } : ({ 1 - alpha :.3f} , { coverage_cqr :.3f} )"
140- )
141192plt .xlabel ("x" )
142193plt .ylabel ("y" )
143- plt .legend ()
194+ plt .legend (
195+ loc = 'upper center' ,
196+ bbox_to_anchor = (0.5 , - 0.05 ),
197+ fancybox = True ,
198+ shadow = True ,
199+ ncol = 3
200+ )
144201plt .show ()
0 commit comments