ENH - Rename MapieCalibrator to TopLabelCalibrator and remove unused method parameter (#687)

Valentin-Laurent · web-flow · commit 919ca4150db0 · 2025-05-09T13:39:21.000+02:00
diff --git a/doc/api.rst b/doc/api.rst
@@ -41,7 +41,7 @@ Calibration
    :toctree: generated/
    :template: class.rst
 
-   mapie.calibration.MapieCalibrator
+   mapie.calibration.TopLabelCalibrator
 
 Calibration Metrics
 ======================================
diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst
@@ -82,7 +82,7 @@ Definition 3 (Approximate calibration) [1].
 .. math:: 
     |\mathbb{E}[Y|\hat{\mu}(X)] - \hat{\mu}(X)| \leq \epsilon
 
-See :class:`~sklearn.calibration.CalibratedClassifierCV` or :class:`~mapie.calibration.MapieCalibrator`
+See :class:`~sklearn.calibration.CalibratedClassifierCV` or :class:`~mapie.calibration.TopLabelCalibrator`
 to use a calibrator.
 
 In the CP framework, it is worth noting that Venn predictors produce probability-type predictions
diff --git a/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py b/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py
@@ -22,7 +22,7 @@
 Instead, we recommend the use of calibration (see more details in the
 Calibration section of the documentation or by using the
 :class:`~sklearn.calibration.CalibratedClassifierCV` proposed by sklearn
-or :class:`~mapie.calibration.MapieCalibrator` proposed in MAPIE).
+or :class:`~mapie.calibration.TopLabelCalibrator` proposed in MAPIE).
 """
 
 from typing import List
diff --git a/mapie/calibration.py b/mapie/calibration.py
@@ -18,12 +18,11 @@
                     _check_null_weight, _fit_estimator, _get_calib_set)
 
 
-class MapieCalibrator(BaseEstimator, ClassifierMixin):
+class TopLabelCalibrator(BaseEstimator, ClassifierMixin):
     """
-    Calibration for multi-class problems.
-
-    This class performs calibration for various methods, currently only
-    top-label calibration [1].
+    Top-label calibration for multi-class problems.
+    Performs a calibration on the class with the highest score
+    given both score and class, see section 2 of [1].
 
     Parameters
     ----------
@@ -33,13 +32,6 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin):
         ``None``.
         If ``None``, estimator defaults to a ``LogisticRegression`` instance.
 
-    method: Optional[str]
-        The only valid method is "top_label".
-        Performs a calibration on the class with highest score
-        given both score and class, see section 2 of [1].
-
-        By default "top_label".
-
     calibrator : Optional[Union[str, RegressorMixin]]
         Any calibrator with scikit-learn API
         (i.e. with fit, predict, and predict_proba methods), by default
@@ -62,9 +54,6 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin):
 
     Attributes
     ----------
-    valid_methods: List[str]
-        List of all valid methods.
-
     classes_: NDArray
         Array with the name of each class.
 
@@ -90,10 +79,10 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin):
     Examples
     --------
     >>> import numpy as np
-    >>> from mapie.calibration import MapieCalibrator
+    >>> from mapie.calibration import TopLabelCalibrator
     >>> X_toy = np.arange(9).reshape(-1, 1)
     >>> y_toy = np.stack([0, 0, 1, 0, 1, 2, 1, 2, 2])
-    >>> mapie = MapieCalibrator().fit(X_toy, y_toy, random_state=20)
+    >>> mapie = TopLabelCalibrator().fit(X_toy, y_toy, random_state=20)
     >>> y_calib = mapie.predict_proba(X_toy)
     >>> print(y_calib)
     [[0.84......        nan        nan]
@@ -117,21 +106,17 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin):
         "isotonic": IsotonicRegression(out_of_bounds="clip")
     }
 
-    valid_methods = ["top_label"]
-
     valid_cv = ["prefit", "split"]
 
     valid_inputs = ["multiclass", "binary"]
 
     def __init__(
         self,
         estimator: Optional[ClassifierMixin] = None,
-        method: str = "top_label",
         calibrator: Optional[Union[str, RegressorMixin]] = None,
         cv: Optional[str] = "split",
     ) -> None:
         self.estimator = estimator
-        self.method = method
         self.calibrator = calibrator
         self.cv = cv
 
@@ -238,21 +223,6 @@ def _get_labels(
         y_pred = self.classes_[np.argmax(pred, axis=1)]
         return max_class_prob, y_pred
 
-    def _check_method(self) -> None:
-        """
-        Check that the method is valid.
-
-        Raises
-        ------
-        ValueError
-            If the method does not belong to the valid methods.
-        """
-        if self.method not in self.valid_methods:
-            raise ValueError(
-                "Invalid method, allowed method are: "
-                + (", ").join(self.valid_methods) + "."
-            )
-
     def _check_type_of_target(self, y: ArrayLike):
         """
         Check type of target for calibration class.
@@ -431,7 +401,7 @@ def fit(
         shuffle: Optional[bool] = True,
         stratify: Optional[ArrayLike] = None,
         **fit_params,
-    ) -> MapieCalibrator:
+    ) -> TopLabelCalibrator:
         """
         Calibrate the estimator on given datasets, according to the chosen
         method.
@@ -471,10 +441,9 @@ def fit(
 
         Returns
         -------
-        MapieCalibrator
+        TopLabelCalibrator
             The model itself.
         """
-        self._check_method()
         cv = self._check_cv(self.cv)
         X, y = indexable(X, y)
         y = _check_y(y)
diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py
@@ -15,7 +15,7 @@
 from sklearn.pipeline import Pipeline, make_pipeline
 from sklearn.preprocessing import OneHotEncoder
 
-from mapie.calibration import MapieCalibrator
+from mapie.calibration import TopLabelCalibrator
 from mapie.metrics.calibration import top_label_ece
 from mapie.metrics.calibration import expected_calibration_error
 
@@ -110,20 +110,19 @@
 
 def test_initialized() -> None:
     """Test that initialization does not crash."""
-    MapieCalibrator()
+    TopLabelCalibrator()
 
 
 def test_default_parameters() -> None:
     """Test default values of input parameters."""
-    mapie_cal = MapieCalibrator()
-    assert mapie_cal.method == "top_label"
+    mapie_cal = TopLabelCalibrator()
     assert mapie_cal.calibrator is None
     assert mapie_cal.cv == "split"
 
 
 def test_default_fit_params() -> None:
     """Test default sample weights and other parameters."""
-    mapie_cal = MapieCalibrator()
+    mapie_cal = TopLabelCalibrator()
     assert (
         signature(mapie_cal.fit).parameters["sample_weight"].default
         is None
@@ -152,15 +151,15 @@ def test_false_str_estimator() -> None:
         ValueError,
         match=r".*Please provide a string in*",
     ):
-        mapie_cal = MapieCalibrator(
+        mapie_cal = TopLabelCalibrator(
             calibrator="not_estimator"
         )
         mapie_cal.fit(X, y)
 
 
 def test_estimator_none() -> None:
     """Test that no input for calibrator will return a sigmoid"""
-    mapie_cal = MapieCalibrator()
+    mapie_cal = TopLabelCalibrator()
     mapie_cal.fit(X, y)
     assert isinstance(
         mapie_cal.calibrators[list(mapie_cal.calibrators.keys())[0]],
@@ -172,34 +171,24 @@ def test_check_type_of_target() -> None:
     """Test the type of target."""
     X = [0.5, 0.2, 0.4, 0.8, 3.8]
     y = [0.4, 0.2, 3.6, 3, 0.2]
-    mapie_cal = MapieCalibrator()
+    mapie_cal = TopLabelCalibrator()
     with pytest.raises(
         ValueError,
         match=r".*Make sure to have one of the allowed targets:*"
     ):
         mapie_cal.fit(X, y)
 
 
-def test_other_methods() -> None:
-    """Test that invalid string for method returns error"""
-    with pytest.raises(
-        ValueError,
-        match=r".*Invalid method, allowed method are*",
-    ):
-        mapie_cal = MapieCalibrator(method="no_method")
-        mapie_cal.fit(X, y)
-
-
 def test_prefit_cv_argument() -> None:
     """Test that prefit method works"""
     est = RandomForestClassifier().fit(X, y)
-    mapie_cal = MapieCalibrator(estimator=est, cv="prefit")
+    mapie_cal = TopLabelCalibrator(estimator=est, cv="prefit")
     mapie_cal.fit(X, y)
 
 
 def test_split_cv_argument() -> None:
     """Test that split method works"""
-    mapie_cal = MapieCalibrator(cv="split")
+    mapie_cal = TopLabelCalibrator(cv="split")
     mapie_cal.fit(X, y)
 
 
@@ -210,7 +199,7 @@ def test_invalid_cv_argument(cv: str) -> None:
         ValueError,
         match=r".*Invalid cv argument*",
     ):
-        mapie_cal = MapieCalibrator(cv=cv)
+        mapie_cal = TopLabelCalibrator(cv=cv)
         mapie_cal.fit(X, y)
 
 
@@ -219,10 +208,10 @@ def test_prefit_split_same_results() -> None:
     est = RandomForestClassifier(
         random_state=random_state
     ).fit(X_train, y_train)
-    mapie_cal_prefit = MapieCalibrator(estimator=est, cv="prefit")
+    mapie_cal_prefit = TopLabelCalibrator(estimator=est, cv="prefit")
     mapie_cal_prefit.fit(X_calib, y_calib)
 
-    mapie_cal_split = MapieCalibrator(
+    mapie_cal_split = TopLabelCalibrator(
         estimator=RandomForestClassifier(random_state=random_state)
     )
     mapie_cal_split.fit(
@@ -242,7 +231,7 @@ def test_not_seen_calibrator() -> None:
         UserWarning,
         match=r".*WARNING: This predicted label*"
     ):
-        mapie_cal = MapieCalibrator()
+        mapie_cal = TopLabelCalibrator()
         mapie_cal.fit(X, y)
         mapie_cal.calibrators.clear()
         mapie_cal.predict_proba(X)
@@ -255,7 +244,7 @@ def test_shape_of_output(
     estimator: ClassifierMixin
 ) -> None:
     """Test that the size of the outputs are coherent."""
-    mapie_cal = MapieCalibrator(
+    mapie_cal = TopLabelCalibrator(
         estimator=estimator,
         calibrator=calibrator,
     )
@@ -269,7 +258,7 @@ def test_number_of_classes_equal_calibrators() -> None:
     Test that the number of calibrators is the same as the number
     of classes in the calibration step.
     """
-    mapie_cal = MapieCalibrator()
+    mapie_cal = TopLabelCalibrator()
     mapie_cal.fit(
         X=X_,
         y=y_,
@@ -281,7 +270,7 @@ def test_number_of_classes_equal_calibrators() -> None:
 
 def test_same_predict() -> None:
     """Test that the same prediction is made regardless of the calibration."""
-    mapie_cal = MapieCalibrator(method="top_label")
+    mapie_cal = TopLabelCalibrator()
     mapie_cal.fit(
         X=X_,
         y=y_,
@@ -300,13 +289,13 @@ def test_same_predict() -> None:
     )
 
 
-@pytest.mark.parametrize("cv", MapieCalibrator.valid_cv)
+@pytest.mark.parametrize("cv", TopLabelCalibrator.valid_cv)
 def test_correct_results(cv: str) -> None:
     """
     Test that the y_score and top label score from the test dataset result
     in the correct scores (in a multi-class setting).
     """
-    mapie_cal = MapieCalibrator(cv=cv)
+    mapie_cal = TopLabelCalibrator(cv=cv)
     mapie_cal.fit(
         X=X_,
         y=y_,
@@ -326,7 +315,7 @@ def test_correct_results(cv: str) -> None:
     )
 
 
-@pytest.mark.parametrize("cv", MapieCalibrator.valid_cv)
+@pytest.mark.parametrize("cv", TopLabelCalibrator.valid_cv)
 def test_correct_results_binary(cv: str) -> None:
     """
     Test that the y_score and top label score from the test dataset result
@@ -338,7 +327,7 @@ def test_correct_results_binary(cv: str) -> None:
         n_informative=4,
         random_state=random_state
     )
-    mapie_cal = MapieCalibrator(cv=cv)
+    mapie_cal = TopLabelCalibrator(cv=cv)
     mapie_cal.fit(
         X=X_binary,
         y=y_binary,
@@ -375,17 +364,17 @@ def test_different_binary_y_combinations() -> None:
         n_informative=4,
         random_state=random_state
     )
-    mapie_cal = MapieCalibrator()
+    mapie_cal = TopLabelCalibrator()
     mapie_cal.fit(X_comb, y_comb, random_state=random_state)
     y_score = mapie_cal.predict_proba(X_comb)
 
     y_comb1 = np.where(y_comb == 2, 3, y_comb)
-    mapie_cal1 = MapieCalibrator()
+    mapie_cal1 = TopLabelCalibrator()
     mapie_cal1.fit(X_comb, y_comb1, random_state=random_state)
     y_score1 = mapie_cal1.predict_proba(X_comb)
 
     y_comb2 = np.where(y_comb == 2, 40, y_comb)
-    mapie_cal2 = MapieCalibrator()
+    mapie_cal2 = TopLabelCalibrator()
     mapie_cal2.fit(X_comb, y_comb2, random_state=random_state)
     y_score2 = mapie_cal2.predict_proba(X_comb)
     np.testing.assert_array_almost_equal(y_score, y_score1)
@@ -417,9 +406,9 @@ def test_results_with_constant_sample_weights(
     """
     n_samples = len(X)
     estimator = RandomForestClassifier(random_state=random_state)
-    mapie_clf0 = MapieCalibrator(estimator=estimator, calibrator=calibrator)
-    mapie_clf1 = MapieCalibrator(estimator=estimator, calibrator=calibrator)
-    mapie_clf2 = MapieCalibrator(estimator=estimator, calibrator=calibrator)
+    mapie_clf0 = TopLabelCalibrator(estimator=estimator, calibrator=calibrator)
+    mapie_clf1 = TopLabelCalibrator(estimator=estimator, calibrator=calibrator)
+    mapie_clf2 = TopLabelCalibrator(estimator=estimator, calibrator=calibrator)
     mapie_clf0.fit(X, y, sample_weight=None, random_state=random_state)
     mapie_clf1.fit(
         X, y, sample_weight=np.ones(shape=n_samples),
@@ -463,7 +452,7 @@ def test_pipeline_compatibility() -> None:
     )
     pipe = make_pipeline(preprocessor, LogisticRegression())
     pipe.fit(X, y)
-    mapie = MapieCalibrator(estimator=pipe)
+    mapie = TopLabelCalibrator(estimator=pipe)
     mapie.fit(X, y)
     mapie.predict(X)
 
@@ -476,7 +465,7 @@ def test_fit_parameters_passing() -> None:
     """
     gb = GradientBoostingClassifier(random_state=random_state)
 
-    mapie = MapieCalibrator(estimator=gb)
+    mapie = TopLabelCalibrator(estimator=gb)
 
     def early_stopping_monitor(i, est, locals):
         """Returns True on the 3rd iteration."""
diff --git a/notebooks/calibration/top_label_calibration.ipynb b/notebooks/calibration/top_label_calibration.ipynb