10
10
Throughout this tutorial, we will answer the following questions:
11
11
12
12
- How does the number of classes in the prediction sets vary according to
13
- the significance level ?
13
+ the confidence level?
14
14
15
- - Is the chosen conformal method well calibrated ?
15
+ - Is the chosen conformal method well calibrated?
16
16
17
- - What are the pros and cons of the conformal methods included in MAPIE ?
17
+ - What are the pros and cons of the conformal methods included in MAPIE?
18
18
"""
19
19
20
20
import matplotlib .pyplot as plt
21
21
import numpy as np
22
- from sklearn .model_selection import train_test_split
23
22
from sklearn .naive_bayes import GaussianNB
24
23
25
- from mapie .classification import _MapieClassifier
26
- from mapie .conformity_scores import APSConformityScore
24
+ from mapie .classification import SplitConformalClassifier
25
+ from mapie .utils import train_conformalize_test_split
27
26
from mapie .metrics .classification import (
28
- classification_coverage_score ,
27
+ classification_coverage_score_v2 ,
29
28
classification_mean_width_score ,
30
29
)
31
30
36
35
# We will use MAPIE to estimate a prediction set of several classes such
37
36
# that the probability that the true label of a new test point is included
38
37
# in the prediction set is always higher than the target confidence level :
39
- # ``P(Yₙ₊₁ ∈ Ĉₙ,α(Xₙ₊₁)) ≥ 1 - α``
38
+ # ``P(Yₙ₊₁ ∈ Ĉₙ,α(Xₙ₊₁)) ≥ 1 - α``.
40
39
# We start by using the softmax score output by the base classifier as the
41
40
# conformity score on a toy two-dimensional dataset.
42
41
#
43
42
# We estimate the prediction sets as follows :
44
43
#
45
- # * Generate a dataset with train, calibration and test, the model is
44
+ # * Generate a dataset with train, conformalization and test, the model is
46
45
# fitted on the training set.
47
46
#
48
47
# * Set the conformal score ``Sᵢ = 𝑓̂(Xᵢ)ᵧᵢ``, the softmax
49
- # output of the true class for each sample in the calibration set.
48
+ # output of the true class for each sample in the conformity set.
50
49
#
51
50
# * Define ``q̂`` as being the ``(n + 1)(α) / n``
52
51
# previous quantile of ``S₁, ..., Sₙ``
73
72
for center , cov in zip (centers , covs )
74
73
])
75
74
y = np .hstack ([np .full (n_samples , i ) for i in range (n_classes )])
76
- X_train_cal , X_test , y_train_cal , y_test = train_test_split (
77
- X , y , test_size = 0.2
78
- )
79
- X_train , X_cal , y_train , y_cal = train_test_split (
80
- X_train_cal , y_train_cal , test_size = 0.25
75
+ (X_train , X_conf , X_test ,
76
+ y_train , y_conf , y_test ) = train_conformalize_test_split (
77
+ X , y , train_size = 0.6 , conformalize_size = 0.2 , test_size = 0.2
81
78
)
82
79
83
80
xx , yy = np .meshgrid (
105
102
106
103
##############################################################################
107
104
# We fit our training data with a Gaussian Naive Base estimator. And then we
108
- # apply MAPIE in the calibration data with the LAC conformity score to the
109
- # estimator indicating that it has already been fitted with `cv=" prefit" `.
110
- # We then estimate the prediction sets with differents alpha values with a
111
- # ``fit `` and ``predict`` process.
105
+ # apply MAPIE in the conformity data with the LAC conformity score to the
106
+ # estimator indicating that it has already been fitted with `prefit=True `.
107
+ # We then estimate the prediction sets with different confidence level values with a
108
+ # ``conformalize `` and ``predict`` process.
112
109
113
- clf = GaussianNB ().fit (X_train , y_train )
110
+ clf = GaussianNB ()
111
+ clf .fit (X_train , y_train )
114
112
y_pred = clf .predict (X_test )
115
113
y_pred_proba = clf .predict_proba (X_test )
116
114
y_pred_proba_max = np .max (y_pred_proba , axis = 1 )
117
- mapie_score = _MapieClassifier (estimator = clf , cv = "prefit" )
118
- mapie_score .fit (X_cal , y_cal )
119
- alpha = [0.2 , 0.1 , 0.05 ]
120
- y_pred_score , y_ps_score = mapie_score .predict (X_test_mesh , alpha = alpha )
115
+ confidence_level = [0.8 , 0.9 , 0.95 ]
116
+ mapie_score = SplitConformalClassifier (
117
+ estimator = clf ,
118
+ confidence_level = confidence_level ,
119
+ prefit = True
120
+ )
121
+ mapie_score .conformalize (X_conf , y_conf )
122
+ y_pred_score , y_ps_score = mapie_score .predict_set (X_test_mesh )
121
123
122
124
##############################################################################
123
125
# * ``y_pred_score``: represents the prediction in the test set by the base
124
126
# estimator.
125
- # * ``y_ps_score``: reprensents the prediction sets estimated by MAPIE with
127
+ # * ``y_ps_score``: represents the prediction sets estimated by MAPIE with
126
128
# the "lac" conformity score.
127
129
128
130
129
- def plot_scores (n , alphas , scores , quantiles ):
131
+ def plot_scores (n , confidence_levels , scores , quantiles ):
130
132
colors = {0 : "#1f77b4" , 1 : "#ff7f0e" , 2 : "#2ca02c" }
131
133
plt .figure (figsize = (7 , 5 ))
132
134
plt .hist (scores , bins = "auto" )
@@ -137,7 +139,7 @@ def plot_scores(n, alphas, scores, quantiles):
137
139
ymax = 400 ,
138
140
color = colors [i ],
139
141
ls = "dashed" ,
140
- label = f"alpha = { alphas [i ]} "
142
+ label = f"confidence_level = { confidence_levels [i ]} "
141
143
)
142
144
plt .title ("Distribution of scores" )
143
145
plt .legend ()
@@ -149,22 +151,22 @@ def plot_scores(n, alphas, scores, quantiles):
149
151
##############################################################################
150
152
# Let’s see the distribution of the scores with the calculated quantiles.
151
153
152
- scores = mapie_score .conformity_scores_
153
- n = len (mapie_score .conformity_scores_ )
154
- quantiles = mapie_score .conformity_score_function_ .quantiles_
155
- plot_scores (n , alpha , scores , quantiles )
154
+ scores = mapie_score ._mapie_classifier . conformity_scores_
155
+ n = len (mapie_score ._mapie_classifier . conformity_scores_ )
156
+ quantiles = mapie_score ._mapie_classifier . conformity_score_function_ .quantiles_
157
+ plot_scores (n , confidence_level , scores , quantiles )
156
158
157
159
##############################################################################
158
- # The estimated quantile increases with alpha .
159
- # A high value of alpha can potentially lead to a high quantile which would
160
- # not necessarily be reached by any class in uncertain areas, resulting in
161
- # null regions.
160
+ # The estimated quantile increases with the confidence level .
161
+ # A low confidence level can potentially lead to a low quantile ``q``; the associated
162
+ # ``1 - q`` threshold would therefore not necessarily be reached by any class in
163
+ # uncertain areas, resulting in null regions.
162
164
#
163
165
# We will now visualize the differences between the prediction sets of the
164
- # different values of alpha .
166
+ # different values of confidence level .
165
167
166
168
167
- def plot_results (alphas , X , y_pred , y_ps ):
169
+ def plot_results (confidence_levels , X , y_pred , y_ps ):
168
170
tab10 = plt .cm .get_cmap ('Purples' , 4 )
169
171
colors = {0 : "#1f77b4" , 1 : "#ff7f0e" , 2 : "#2ca02c" , 3 : "#d62728" }
170
172
y_pred_col = list (map (colors .get , y_pred ))
@@ -179,7 +181,7 @@ def plot_results(alphas, X, y_pred, y_ps):
179
181
alpha = 0.4
180
182
)
181
183
axs [0 ].set_title ("Predicted labels" )
182
- for i , alpha in enumerate (alphas ):
184
+ for i , confidence_level in enumerate (confidence_levels ):
183
185
y_pi_sums = y_ps [:, :, i ].sum (axis = 1 )
184
186
num_labels = axs [i + 1 ].scatter (
185
187
X [:, 0 ],
@@ -193,11 +195,11 @@ def plot_results(alphas, X, y_pred, y_ps):
193
195
vmax = 3
194
196
)
195
197
plt .colorbar (num_labels , ax = axs [i + 1 ])
196
- axs [i + 1 ].set_title (f"Number of labels for alpha= { alpha } " )
198
+ axs [i + 1 ].set_title (f"Number of labels for confidence_level= { confidence_level } " )
197
199
plt .show ()
198
200
199
201
200
- plot_results (alpha , X_test_mesh , y_pred_score , y_ps_score )
202
+ plot_results (confidence_level , X_test_mesh , y_pred_score , y_ps_score )
201
203
202
204
##############################################################################
203
205
# When the class coverage is not large enough, the prediction sets can be
@@ -208,80 +210,94 @@ def plot_results(alphas, X, y_pred, y_ps):
208
210
# classifier.
209
211
#
210
212
# Let’s now study the effective coverage and the mean prediction set widths
211
- # as function of the ``1 - α `` target coverage. To this aim, we use once
213
+ # as function of the ``confidence_level `` target coverage. To this aim, we use once
212
214
# again the ``predict`` method of MAPIE to estimate predictions sets on a
213
- # large number of ``α`` values.
214
-
215
- alpha2 = np .arange (0.02 , 0.98 , 0.02 )
216
- _ , y_ps_score2 = mapie_score .predict (X_test , alpha = alpha2 )
217
- coverages_score = [
218
- classification_coverage_score (y_test , y_ps_score2 [:, :, i ])
219
- for i , _ in enumerate (alpha2 )
220
- ]
215
+ # large number of ``confidence_level`` values.
216
+
217
+ confidence_level2 = np .arange (0.02 , 0.98 , 0.02 )
218
+ mapie_score2 = SplitConformalClassifier (
219
+ estimator = clf ,
220
+ confidence_level = confidence_level2 ,
221
+ prefit = True
222
+ )
223
+ mapie_score2 .conformalize (X_conf , y_conf )
224
+ _ , y_ps_score2 = mapie_score2 .predict_set (X_test )
225
+ coverages_score = classification_coverage_score_v2 (y_test , y_ps_score2 )
221
226
widths_score = classification_mean_width_score (y_ps_score2 )
222
227
223
228
224
- def plot_coverages_widths (alpha , coverage , width , method ):
229
+ def plot_coverages_widths (confidence_level , coverage , width , conformity_score ):
225
230
fig , axs = plt .subplots (1 , 2 , figsize = (12 , 5 ))
226
- axs [0 ].scatter (1 - alpha , coverage , label = method )
227
- axs [0 ].set_xlabel ("1 - alpha " )
231
+ axs [0 ].scatter (confidence_level , coverage , label = conformity_score )
232
+ axs [0 ].set_xlabel ("Confidence level " )
228
233
axs [0 ].set_ylabel ("Coverage score" )
229
234
axs [0 ].plot ([0 , 1 ], [0 , 1 ], label = "x=y" , color = "black" )
230
235
axs [0 ].legend ()
231
- axs [1 ].scatter (1 - alpha , width , label = method )
232
- axs [1 ].set_xlabel ("1 - alpha " )
236
+ axs [1 ].scatter (confidence_level , width , label = conformity_score )
237
+ axs [1 ].set_xlabel ("Confidence level " )
233
238
axs [1 ].set_ylabel ("Average size of prediction sets" )
234
239
axs [1 ].legend ()
235
240
plt .show ()
236
241
237
242
238
- plot_coverages_widths (alpha2 , coverages_score , widths_score , "lac" )
243
+ plot_coverages_widths (
244
+ confidence_level2 , coverages_score , widths_score , "lac"
245
+ )
239
246
240
247
##############################################################################
241
248
# 2. Conformal Prediction method using the cumulative softmax score
242
249
# -----------------------------------------------------------------
243
250
#
244
251
# We saw in the previous section that the "lac" conformity score is well calibrated by
245
252
# providing accurate coverage levels. However, it tends to give null
246
- # prediction sets for uncertain regions, especially when the ``α ``
247
- # value is high .
253
+ # prediction sets for uncertain regions, especially when the ``confidence_level ``
254
+ # value is low .
248
255
# MAPIE includes another method, called Adaptive Prediction Set (APS),
249
256
# whose conformity score is the cumulated score of the softmax output until
250
257
# the true label is reached (see the theoretical description for more details).
251
- # We will see in this Section that this method no longer estimates null
252
- # prediction sets but by giving slightly bigger prediction sets.
258
+ # We will see in this section that this method no longer estimates null
259
+ # prediction sets by giving slightly bigger prediction sets.
253
260
#
254
261
# Let's visualize the prediction sets obtained with the APS method on the test
255
- # set after fitting MAPIE on the calibration set.
256
-
257
- mapie_aps = _MapieClassifier (
258
- estimator = clf , cv = "prefit" , conformity_score = APSConformityScore ()
262
+ # set after fitting MAPIE on the conformity set.
263
+
264
+ confidence_level = [0.8 , 0.9 , 0.95 ]
265
+ mapie_aps = SplitConformalClassifier (
266
+ estimator = clf ,
267
+ confidence_level = confidence_level ,
268
+ conformity_score = "aps" ,
269
+ prefit = True
259
270
)
260
- mapie_aps .fit (X_cal , y_cal )
261
- alpha = [0.2 , 0.1 , 0.05 ]
262
- y_pred_aps , y_ps_aps = mapie_aps .predict (
263
- X_test_mesh , alpha = alpha , include_last_label = True
271
+ mapie_aps .conformalize (X_conf , y_conf )
272
+ y_pred_aps , y_ps_aps = mapie_aps .predict_set (
273
+ X_test_mesh , conformity_score_params = {"include_last_label" : True }
264
274
)
265
275
266
- plot_results (alpha , X_test_mesh , y_pred_aps , y_ps_aps )
276
+ plot_results (confidence_level , X_test_mesh , y_pred_aps , y_ps_aps )
267
277
268
278
##############################################################################
269
279
# One can notice that the uncertain regions are emphasized by wider
270
280
# boundaries, but without null prediction sets with respect to the first
271
281
# "lac" method.
272
282
273
- _ , y_ps_aps2 = mapie_aps .predict (
274
- X_test , alpha = alpha2 , include_last_label = "randomized"
283
+ mapie_aps2 = SplitConformalClassifier (
284
+ estimator = clf ,
285
+ confidence_level = confidence_level2 ,
286
+ conformity_score = "aps" ,
287
+ prefit = True
275
288
)
276
- coverages_aps = [
277
- classification_coverage_score (y_test , y_ps_aps2 [:, :, i ])
278
- for i , _ in enumerate (alpha2 )
279
- ]
289
+ mapie_aps2 .conformalize (X_conf , y_conf )
290
+ _ , y_ps_aps2 = mapie_aps2 .predict_set (
291
+ X_test , conformity_score_params = {"include_last_label" : "randomized" }
292
+ )
293
+ coverages_aps = classification_coverage_score_v2 (y_test , y_ps_aps2 )
280
294
widths_aps = classification_mean_width_score (y_ps_aps2 )
281
295
282
- plot_coverages_widths (alpha2 , coverages_aps , widths_aps , "lac" )
296
+ plot_coverages_widths (
297
+ confidence_level2 , coverages_aps , widths_aps , "aps"
298
+ )
283
299
284
300
##############################################################################
285
- # This method also gives accurate calibration plots, meaning that the
301
+ # This method also gives accurate conformalization plots, meaning that the
286
302
# effective coverage level is always very close to the target coverage,
287
303
# sometimes at the expense of slightly bigger prediction sets.
0 commit comments