Skip to content

Commit cdb77bd

Browse files
MTN: Remove deprecated parameter "method" of _MapieClassifier (#669)
1 parent 8536951 commit cdb77bd

File tree

10 files changed

+139
-337
lines changed

10 files changed

+139
-337
lines changed

examples/classification/3-scientific-articles/plot_sadinle2019_example.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from sklearn.naive_bayes import GaussianNB
2727

2828
from mapie.classification import _MapieClassifier
29+
from mapie.conformity_scores import LACConformityScore
2930

3031
# Create training set from multivariate normal distribution
3132
centers = [(0, 3.5), (-2, 0), (2, 0)]
@@ -56,7 +57,11 @@
5657
y_pred = clf.predict(X_test)
5758
y_pred_proba = clf.predict_proba(X_test)
5859
y_pred_proba_max = np.max(y_pred_proba, axis=1)
59-
mapie = _MapieClassifier(estimator=clf, cv="prefit", method="lac")
60+
mapie = _MapieClassifier(
61+
estimator=clf,
62+
cv="prefit",
63+
conformity_score=LACConformityScore()
64+
)
6065
mapie.fit(X_train, y_train)
6166
y_pred_mapie, y_ps_mapie = mapie.predict(X_test, alpha=alpha)
6267

examples/classification/4-tutorials/plot_crossconformal.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
from numpy.typing import NDArray
3939
from mapie.classification import _MapieClassifier
40+
from mapie.conformity_scores import LACConformityScore, APSConformityScore
4041
from mapie.metrics.classification import (
4142
classification_coverage_score,
4243
classification_mean_width_score,
@@ -110,21 +111,30 @@
110111

111112
kf = KFold(n_splits=5, shuffle=True)
112113
clfs, mapies, y_preds, y_ps_mapies = {}, {}, {}, {}
113-
methods = ["lac", "aps"]
114+
conformity_scores = [LACConformityScore(), APSConformityScore()]
114115
alpha = np.arange(0.01, 1, 0.01)
115-
for method in methods:
116+
for conformity_score in conformity_scores:
116117
clfs_, mapies_, y_preds_, y_ps_mapies_ = {}, {}, {}, {}
117118
for fold, (train_index, calib_index) in enumerate(kf.split(X_train)):
118119
clf = GaussianNB().fit(X_train[train_index], y_train[train_index])
119120
clfs_[fold] = clf
120-
mapie = _MapieClassifier(estimator=clf, cv="prefit", method=method)
121+
mapie = _MapieClassifier(
122+
estimator=clf,
123+
cv="prefit",
124+
conformity_score=conformity_score
125+
)
121126
mapie.fit(X_train[calib_index], y_train[calib_index])
122127
mapies_[fold] = mapie
123128
y_pred_mapie, y_ps_mapie = mapie.predict(
124129
X_test_distrib, alpha=alpha, include_last_label="randomized"
125130
)
126131
y_preds_[fold], y_ps_mapies_[fold] = y_pred_mapie, y_ps_mapie
127-
clfs[method], mapies[method], y_preds[method], y_ps_mapies[method] = (
132+
(
133+
clfs[conformity_score],
134+
mapies[conformity_score],
135+
y_preds[conformity_score],
136+
y_ps_mapies[conformity_score]
137+
) = (
128138
clfs_, mapies_, y_preds_, y_ps_mapies_
129139
)
130140

@@ -134,23 +144,23 @@
134144
# set and the estimated quantile for ``alpha`` = 0.1.
135145

136146

137-
fig, axs = plt.subplots(1, len(mapies["lac"]), figsize=(20, 4))
138-
for i, (key, mapie) in enumerate(mapies["lac"].items()):
147+
fig, axs = plt.subplots(1, len(mapies[conformity_scores[0]]), figsize=(20, 4))
148+
for i, (key, mapie) in enumerate(mapies[conformity_scores[0]].items()):
139149
quantiles = mapie.conformity_score_function_.quantiles_[9]
140150
axs[i].set_xlabel("Conformity scores")
141151
axs[i].hist(mapie.conformity_scores_)
142152
axs[i].axvline(quantiles, ls="--", color="k")
143153
axs[i].set_title(f"split={key}\nquantile={quantiles:.3f}")
144154
plt.suptitle(
145155
"Distribution of scores on each calibration fold for the "
146-
f"{methods[0]} method"
156+
f"{conformity_scores[0]} method"
147157
)
148158
plt.show()
149159

150160

151161
##############################################################################
152162
# We notice that the estimated quantile slightly varies among the calibration
153-
# sets for the two methods explored here, suggesting that the
163+
# sets for the two conformity scores explored here, suggesting that the
154164
# train/calibration splitting can slightly impact our results.
155165
#
156166
# Let's now visualize this impact on the number of labels included in each
@@ -202,7 +212,7 @@ def plot_results(
202212

203213

204214
plot_results(
205-
mapies["lac"],
215+
mapies[conformity_scores[0]],
206216
X_test,
207217
X_test_distrib,
208218
y_test_distrib,
@@ -211,7 +221,7 @@ def plot_results(
211221
)
212222

213223
plot_results(
214-
mapies["aps"],
224+
mapies[conformity_scores[1]],
215225
X_test,
216226
X_test_distrib,
217227
y_test_distrib,
@@ -338,19 +348,19 @@ def plot_coverage_width(
338348

339349
STRATEGIES = {
340350
"score_cv_mean": (
341-
Params(method="lac", cv=kf, random_state=42),
351+
Params(conformity_score=LACConformityScore(), cv=kf, random_state=42),
342352
ParamsPredict(include_last_label=False, agg_scores="mean")
343353
),
344354
"score_cv_crossval": (
345-
Params(method="lac", cv=kf, random_state=42),
355+
Params(conformity_score=LACConformityScore(), cv=kf, random_state=42),
346356
ParamsPredict(include_last_label=False, agg_scores="crossval")
347357
),
348358
"cum_score_cv_mean": (
349-
Params(method="aps", cv=kf, random_state=42),
359+
Params(conformity_score=APSConformityScore(), cv=kf, random_state=42),
350360
ParamsPredict(include_last_label="randomized", agg_scores="mean")
351361
),
352362
"cum_score_cv_crossval": (
353-
Params(method="aps", cv=kf, random_state=42),
363+
Params(conformity_score=APSConformityScore(), cv=kf, random_state=42),
354364
ParamsPredict(include_last_label='randomized', agg_scores="crossval")
355365
)
356366
}

examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
from numpy.typing import NDArray
3737
from mapie.classification import _MapieClassifier
38+
from mapie.conformity_scores import LACConformityScore
3839
from mapie.metrics.classification import (
3940
classification_coverage_score,
4041
classification_mean_width_score,
@@ -122,7 +123,7 @@
122123
# so that scores can be interpreted as probabilities
123124
# (see documentation for more information).
124125
# Then we apply :class:`~mapie.classification._MapieClassifier` in the
125-
# calibration data with the methods ``score``
126+
# calibration data with the LAC conformity_score
126127
# to the estimator indicating that it has already been fitted with
127128
# `cv="prefit"`.
128129
# We then estimate the prediction sets with differents alpha values with a
@@ -139,7 +140,7 @@
139140
calib.fit(X_c1, y_c1)
140141

141142
mapie_clf = _MapieClassifier(
142-
estimator=calib, method='lac', cv='prefit', random_state=42
143+
estimator=calib, cv='prefit', random_state=42
143144
)
144145
mapie_clf.fit(X_c2, y_c2)
145146

@@ -156,7 +157,7 @@
156157
# base estimator.
157158
#
158159
# - ``y_ps_mapie``: the prediction sets estimated by MAPIE using the "lac"
159-
# method.
160+
# conformity score.
160161
#
161162
# Let's now visualize the distribution of the conformity scores with the two
162163
# methods with the calculated quantiles for the three alpha values.
@@ -261,7 +262,7 @@ def plot_results(
261262

262263

263264
##############################################################################
264-
# For the "lac" method, when the class coverage is not large enough, the
265+
# For the "lac" conformity score, when the class coverage is not large enough, the
265266
# prediction sets can be empty when the model is uncertain at the border
266267
# between two classes. These null regions disappear for larger class coverages
267268
# but ambiguous classification regions arise with both classes included in
@@ -293,7 +294,7 @@ def plot_results(
293294
calib.fit(X_c1, y_c1)
294295

295296
mapie_clf = _MapieClassifier(
296-
estimator=calib, method='lac', cv='prefit', random_state=42
297+
estimator=calib, conformity_score=LACConformityScore(), cv='prefit', random_state=42
297298
)
298299
mapie_clf.fit(X_c2, y_c2)
299300
_, y_ps_mapie = mapie_clf.predict(
@@ -341,7 +342,7 @@ def plot_coverages_widths(alpha, coverage, width, method):
341342
calib.fit(X_c1, y_c1)
342343

343344
mapie_clf = _MapieClassifier(
344-
estimator=calib, method='lac', cv='prefit', random_state=42
345+
estimator=calib, cv='prefit', random_state=42
345346
)
346347
mapie_clf.fit(X_c2, y_c2)
347348
_, y_ps_mapie = mapie_clf.predict(

examples/classification/4-tutorials/plot_main-tutorial-classification.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from sklearn.naive_bayes import GaussianNB
2424

2525
from mapie.classification import _MapieClassifier
26+
from mapie.conformity_scores import APSConformityScore
2627
from mapie.metrics.classification import (
2728
classification_coverage_score,
2829
classification_mean_width_score,
@@ -104,7 +105,7 @@
104105

105106
##############################################################################
106107
# We fit our training data with a Gaussian Naive Base estimator. And then we
107-
# apply MAPIE in the calibration data with the method ``score`` to the
108+
# apply MAPIE in the calibration data with the LAC conformity score to the
108109
# estimator indicating that it has already been fitted with `cv="prefit"`.
109110
# We then estimate the prediction sets with differents alpha values with a
110111
# ``fit`` and ``predict`` process.
@@ -113,7 +114,7 @@
113114
y_pred = clf.predict(X_test)
114115
y_pred_proba = clf.predict_proba(X_test)
115116
y_pred_proba_max = np.max(y_pred_proba, axis=1)
116-
mapie_score = _MapieClassifier(estimator=clf, cv="prefit", method="lac")
117+
mapie_score = _MapieClassifier(estimator=clf, cv="prefit")
117118
mapie_score.fit(X_cal, y_cal)
118119
alpha = [0.2, 0.1, 0.05]
119120
y_pred_score, y_ps_score = mapie_score.predict(X_test_mesh, alpha=alpha)
@@ -122,7 +123,7 @@
122123
# * ``y_pred_score``: represents the prediction in the test set by the base
123124
# estimator.
124125
# * ``y_ps_score``: reprensents the prediction sets estimated by MAPIE with
125-
# the "lac" method.
126+
# the "lac" conformity score.
126127

127128

128129
def plot_scores(n, alphas, scores, quantiles):
@@ -240,7 +241,7 @@ def plot_coverages_widths(alpha, coverage, width, method):
240241
# 2. Conformal Prediction method using the cumulative softmax score
241242
# -----------------------------------------------------------------
242243
#
243-
# We saw in the previous section that the "lac" method is well calibrated by
244+
# We saw in the previous section that the "lac" conformity score is well calibrated by
244245
# providing accurate coverage levels. However, it tends to give null
245246
# prediction sets for uncertain regions, especially when the ``α``
246247
# value is high.
@@ -254,7 +255,7 @@ def plot_coverages_widths(alpha, coverage, width, method):
254255
# set after fitting MAPIE on the calibration set.
255256

256257
mapie_aps = _MapieClassifier(
257-
estimator=clf, cv="prefit", method="aps"
258+
estimator=clf, cv="prefit", conformity_score=APSConformityScore()
258259
)
259260
mapie_aps.fit(X_cal, y_cal)
260261
alpha = [0.2, 0.1, 0.05]

mapie/classification.py

Lines changed: 2 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -582,42 +582,6 @@ class _MapieClassifier(ClassifierMixin, BaseEstimator):
582582
(i.e. with fit, predict, and predict_proba methods), by default None.
583583
If ``None``, estimator defaults to a ``LogisticRegression`` instance.
584584
585-
method: Optional[str]
586-
[DEPRECIATED see instead conformity_score]
587-
Method to choose for prediction interval estimates.
588-
Choose among:
589-
590-
- ``"naive"``, sum of the probabilities until the 1-alpha threshold.
591-
592-
- ``"lac"``, Least Ambiguous set-valued
593-
Classifier. It is based on the scores
594-
(i.e. 1 minus the softmax score of the true label)
595-
on the calibration set. See [1] for more details.
596-
597-
- ``"aps"``, Adaptive Prediction
598-
Sets method. It is based on the sum of the softmax outputs of the
599-
labels until the true label is reached, on the calibration set.
600-
See [2] for more details.
601-
602-
- ``"raps"``, Regularized Adaptive Prediction Sets method. It uses the
603-
same technique as ``"aps"`` method but with a penalty term
604-
to reduce the size of prediction sets. See [3] for more
605-
details. For now, this method only works with ``"prefit"`` and
606-
``"split"`` strategies.
607-
608-
- ``"top_k"``, based on the sorted index of the probability of the true
609-
label in the softmax outputs, on the calibration set. In case two
610-
probabilities are equal, both are taken, thus, the size of some
611-
prediction sets may be different from the others. See [3] for
612-
more details.
613-
614-
- ``None``, that does not specify the method used.
615-
616-
In any case, the `method` parameter does not take precedence over the
617-
`conformity_score` parameter to define the method used.
618-
619-
By default ``None``.
620-
621585
cv: Optional[Union[int, str, BaseCrossValidator]]
622586
The cross-validation strategy for computing scores.
623587
It directly drives the distinction between jackknife and cv variants.
@@ -667,9 +631,6 @@ class _MapieClassifier(ClassifierMixin, BaseEstimator):
667631
conformity_score: BaseClassificationScore
668632
Score function that handle all that is related to conformity scores.
669633
670-
In any case, the `conformity_score` parameter takes precedence over the
671-
`method` parameter to define the method used.
672-
673634
By default ``None``.
674635
675636
random_state: Optional[Union[int, RandomState]]
@@ -757,7 +718,6 @@ class _MapieClassifier(ClassifierMixin, BaseEstimator):
757718
def __init__(
758719
self,
759720
estimator: Optional[ClassifierMixin] = None,
760-
method: Optional[str] = None,
761721
cv: Optional[Union[int, str, BaseCrossValidator]] = None,
762722
test_size: Optional[Union[int, float]] = None,
763723
n_jobs: Optional[int] = None,
@@ -766,7 +726,6 @@ def __init__(
766726
verbose: int = 0
767727
) -> None:
768728
self.estimator = estimator
769-
self.method = method
770729
self.cv = cv
771730
self.test_size = test_size
772731
self.n_jobs = n_jobs
@@ -862,40 +821,6 @@ def _check_fit_parameter(
862821
):
863822
"""
864823
Perform several checks on class parameters.
865-
866-
Parameters
867-
----------
868-
X: ArrayLike
869-
Observed values.
870-
871-
y: ArrayLike
872-
Target values.
873-
874-
sample_weight: Optional[ArrayLike] of shape (n_samples,)
875-
Non-null sample weights.
876-
877-
groups: Optional[ArrayLike] of shape (n_samples,)
878-
Group labels for the samples used while splitting the dataset into
879-
train/test set.
880-
By default ``None``.
881-
882-
Returns
883-
-------
884-
Tuple[Optional[ClassifierMixin],
885-
Optional[Union[int, str, BaseCrossValidator]],
886-
ArrayLike, NDArray, NDArray, Optional[NDArray],
887-
Optional[NDArray], ArrayLike]
888-
Parameters checked
889-
890-
Raises
891-
------
892-
ValueError
893-
If conformity score is FittedResidualNormalizing score and method
894-
is neither ``"prefit"`` or ``"split"``.
895-
896-
ValueError
897-
If ``cv`` is `"prefit"`` or ``"split"`` and ``method`` is not
898-
``"base"``.
899824
"""
900825
self._check_parameters()
901826
cv = _check_cv(
@@ -917,10 +842,7 @@ def _check_fit_parameter(
917842
self.label_encoder_ = self._get_label_encoder()
918843
y_enc = self.label_encoder_.transform(y)
919844

920-
cs_estimator = check_classification_conformity_score(
921-
conformity_score=self.conformity_score,
922-
method=self.method,
923-
)
845+
cs_estimator = check_classification_conformity_score(self.conformity_score)
924846
check_depreciated_size_raps(size_raps)
925847
cs_estimator.set_external_attributes(
926848
classes=self.classes_,
@@ -936,7 +858,7 @@ def _check_fit_parameter(
936858
)
937859
):
938860
raise ValueError(
939-
"RAPS method can only be used "
861+
"RAPS conformity score can only be used "
940862
"with SplitConformalClassifier."
941863
)
942864

0 commit comments

Comments
 (0)