Update plot_main-tutorial-binary-classification.py to v1 (#684)

FaustinPulveric · web-flow · commit 6e6f11aa47eb · 2025-05-07T11:40:17.000+02:00
diff --git a/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py b/examples/classification/4-tutorials/plot_main-tutorial-binary-classification.py
@@ -3,22 +3,22 @@
 Tutorial for set prediction
 ===========================
 
-In this tutorial, we propose set prediction for binary classification
-estimated by :class:`~mapie.classification._MapieClassifier` with the "lac"
+In this example, we propose set prediction for binary classification
+estimated by :class:`~mapie_v1.classification.SplitConformalClassifier` with the "lac"
 method on two-dimensional dataset.
 
 Throughout this tutorial, we will answer the following questions:
 
 - How does the number of classes in the prediction sets vary according to
-  the significance level ?
+  the confidence level?
 
-- Is the conformal method well calibrated ?
+- Is the conformal method well calibrated?
 
 - What are the pros and cons of the set prediction for binary classification
-  in MAPIE ?
+  in MAPIE?
 
-PLEASE NOTE: we don't recommend using set prediction in MAPIE, even though
-we offer this tutorial for those who might be interested.
+PLEASE NOTE: we don't recommend using set prediction in binary classification settings,
+even though we offer this tutorial for those who might be interested.
 Instead, we recommend the use of calibration (see more details in the
 Calibration section of the documentation or by using the
 :class:`~sklearn.calibration.CalibratedClassifierCV` proposed by sklearn
@@ -34,10 +34,10 @@
 from sklearn.naive_bayes import GaussianNB
 
 from numpy.typing import NDArray
-from mapie.classification import _MapieClassifier
-from mapie.conformity_scores import LACConformityScore
+from mapie.classification import SplitConformalClassifier
+from mapie.utils import train_conformalize_test_split
 from mapie.metrics.classification import (
-    classification_coverage_score,
+    classification_coverage_score_v2,
     classification_mean_width_score,
 )
 
@@ -47,18 +47,17 @@
 #
 # We will use MAPIE to estimate a prediction set such that
 # the probability that the true label of a new test point is included in the
-# prediction set is always higher than the target confidence level :
-# ``1 - α``.
+# prediction set is always higher than the target confidence level.
 # We start by using the softmax score output by the base
 # classifier as the conformity score on a toy two-dimensional dataset.
 # We estimate the prediction sets as follows :
 #
-# * First we generate a dataset with train, calibration and test, the model
+# * First we generate a dataset with train, conformalization and test, the model
 #   is fitted in the training set.
 #
 # * We set the conformal score ``Sᵢ = 𝑓̂(Xᵢ)ᵧᵢ``
 #   from the softmax output of the true class for each sample
-#   in the calibration set.
+#   in the conformity set.
 #
 # * Then we define ``q̂`` as being the
 #   ``(n + 1) (1 - α) / n``
@@ -87,9 +86,11 @@
     ]
 )
 y = np.hstack([np.full(n_samples, i) for i in range(n_classes)])
-X, X_val, y, y_val = train_test_split(X, y, test_size=0.5)
-X_train, X_cal, y_train, y_cal = train_test_split(X, y, test_size=0.3)
-X_c1, X_c2, y_c1, y_c2 = train_test_split(X_cal, y_cal, test_size=0.5)
+(X_train, X_conf, X_val,
+ y_train, y_conf, y_val) = train_conformalize_test_split(
+    X, y, train_size=0.35, conformalize_size=0.15, test_size=0.5
+)
+X_c1, X_c2, y_c1, y_c2 = train_test_split(X_conf, y_conf, test_size=0.5)
 
 xx, yy = np.meshgrid(
     np.arange(x_min, x_max, step), np.arange(x_min, x_max, step)
@@ -122,32 +123,32 @@
 # :class:`~sklearn.calibration.CalibratedClassifierCV` proposed by sklearn
 # so that scores can be interpreted as probabilities
 # (see documentation for more information).
-# Then we apply :class:`~mapie.classification._MapieClassifier` in the
-# calibration data with the LAC conformity_score
+# Then we apply :class:`~mapie_v1.classification.SplitConformalClassifier` on the
+# conformity data with the LAC conformity score
 # to the estimator indicating that it has already been fitted with
-# `cv="prefit"`.
-# We then estimate the prediction sets with differents alpha values with a
-# ``fit`` and ``predict`` process.
+# `prefit=True`.
+# We then estimate the prediction sets with different confidence level values with a
+# ``conformalize`` and ``predict`` process.
 
-clf = GaussianNB().fit(X_train, y_train)
+clf = GaussianNB()
+clf.fit(X_train, y_train)
 y_pred = clf.predict(X_test)
 y_pred_proba = clf.predict_proba(X_test)
 y_pred_proba_max = np.max(y_pred_proba, axis=1)
 
+confidence_level = [0.8, 0.9, 0.95]
+
 calib = CalibratedClassifierCV(
     estimator=clf, method='sigmoid', cv='prefit'
 )
 calib.fit(X_c1, y_c1)
 
-mapie_clf = _MapieClassifier(
-    estimator=calib, cv='prefit', random_state=42
+mapie_clf = SplitConformalClassifier(
+    estimator=calib, confidence_level=confidence_level, prefit=True, random_state=42
 )
-mapie_clf.fit(X_c2, y_c2)
+mapie_clf.conformalize(X_c2, y_c2)
 
-alpha = [0.2, 0.1, 0.05]
-y_pred_mapie, y_ps_mapie = mapie_clf.predict(
-    X_test, alpha=alpha,
-)
+y_pred_mapie, y_ps_mapie = mapie_clf.predict_set(X_test)
 
 
 ##############################################################################
@@ -160,14 +161,14 @@
 #   conformity score.
 #
 # Let's now visualize the distribution of the conformity scores with the two
-# methods with the calculated quantiles for the three alpha values.
+# methods with the calculated quantiles for the three confidence level values.
 
 
 def plot_scores(
-    alphas: List[float],
+    confidence_levels: List[float],
     scores: NDArray,
     quantiles: NDArray,
-    method: str,
+    conformity_score: str,
     ax: plt.Axes,
 ) -> None:
     colors = {0: "#1f77b4", 1: "#ff7f0e", 2: "#2ca02c"}
@@ -180,25 +181,25 @@ def plot_scores(
             ymax=100,
             color=colors[i],
             linestyles="dashed",
-            label=f"alpha = {alphas[i]}",
+            label=f"confidence_level = {confidence_levels[i]}",
         )
         i = i + 1
-    ax.set_title(f"Distribution of scores for '{method}' method")
+    ax.set_title(f"Distribution of scores for '{conformity_score}' conformity score")
     ax.legend()
     ax.set_xlabel("scores")
     ax.set_ylabel("count")
 
 
 fig, axs = plt.subplots(1, 1, figsize=(10, 5))
-conformity_scores = mapie_clf.conformity_scores_
-quantiles = mapie_clf.conformity_score_function_.quantiles_
-plot_scores(alpha, conformity_scores, quantiles, 'lac', axs)
+conformity_scores = mapie_clf._mapie_classifier.conformity_scores_
+quantiles = mapie_clf._mapie_classifier.conformity_score_function_.quantiles_
+plot_scores(confidence_level, conformity_scores, quantiles, 'lac', axs)
 plt.show()
 
 
 ##############################################################################
 # We will now compare the differences between the prediction sets of the
-# different values ​​of alpha.
+# different values ​​of confidence level.
 
 def plot_prediction_decision(y_pred_mapie: NDArray, ax) -> None:
     y_pred_col = list(map(colors.get, y_pred_mapie))
@@ -221,7 +222,7 @@ def plot_prediction_decision(y_pred_mapie: NDArray, ax) -> None:
     ax.set_title("Predicted labels")
 
 
-def plot_prediction_set(y_ps: NDArray, alpha_: float, ax) -> None:
+def plot_prediction_set(y_ps: NDArray, confidence_level_: float, ax) -> None:
     tab10 = plt.cm.get_cmap("Purples", 4)
     y_pi_sums = y_ps.sum(axis=1)
     num_labels = ax.scatter(
@@ -243,22 +244,22 @@ def plot_prediction_set(y_ps: NDArray, alpha_: float, ax) -> None:
         s=10,
         edgecolor="k",
     )
-    ax.set_title(f"Number of labels for alpha={alpha_}")
+    ax.set_title(f"Number of labels for confidence_level = {confidence_level_}")
     plt.colorbar(num_labels, ax=ax)
 
 
 def plot_results(
-    alphas: List[float], y_pred_mapie: NDArray, y_ps_mapie: NDArray
+    confidence_levels: List[float], y_pred_mapie: NDArray, y_ps_mapie: NDArray
 ) -> None:
     _, [[ax1, ax2], [ax3, ax4]] = plt.subplots(2, 2, figsize=(10, 10))
     axs = {0: ax1, 1: ax2, 2: ax3, 3: ax4}
     plot_prediction_decision(y_pred_mapie, axs[0])
-    for i, alpha_ in enumerate(alphas):
-        plot_prediction_set(y_ps_mapie[:, :, i], alpha_, axs[i+1])
+    for i, confidence_level_ in enumerate(confidence_levels):
+        plot_prediction_set(y_ps_mapie[:, :, i], confidence_level_, axs[i+1])
     plt.show()
 
 
-plot_results(alpha, y_pred_mapie, y_ps_mapie)
+plot_results(confidence_level, y_pred_mapie, y_ps_mapie)
 
 
 ##############################################################################
@@ -284,81 +285,99 @@ def plot_results(
 
 ##############################################################################
 # Let's now compare the effective coverage and the average of prediction set
-# widths as function of the ``1 - α`` target coverage.
+# widths as function of the ``confidence_level`` target coverage.
 
-alpha_ = np.arange(0.02, 0.98, 0.02)
+confidence_level_ = np.arange(0.02, 0.98, 0.02)
 
 calib = CalibratedClassifierCV(
     estimator=clf, method='sigmoid', cv='prefit'
 )
 calib.fit(X_c1, y_c1)
 
-mapie_clf = _MapieClassifier(
-    estimator=calib, conformity_score=LACConformityScore(), cv='prefit', random_state=42
-)
-mapie_clf.fit(X_c2, y_c2)
-_, y_ps_mapie = mapie_clf.predict(
-    X, alpha=alpha_
+mapie_clf = SplitConformalClassifier(
+    estimator=calib,
+    confidence_level=confidence_level_,
+    conformity_score="lac",
+    prefit=True,
+    random_state=42
 )
+mapie_clf.conformalize(X_c2, y_c2)
+_, y_ps_mapie = mapie_clf.predict_set(X)
 
-coverage = np.array([
-    classification_coverage_score(y, y_ps_mapie[:, :, i])
-    for i, _ in enumerate(alpha_)
-])
+coverage = classification_coverage_score_v2(y, y_ps_mapie)
 mean_width = classification_mean_width_score(y_ps_mapie)
 
 
-def plot_coverages_widths(alpha, coverage, width, method):
-    quantiles = mapie_clf.conformity_score_function_.quantiles_
+def plot_coverages_widths(confidence_level, coverage, width, conformity_score):
+    quantiles = mapie_clf._mapie_classifier.conformity_score_function_.quantiles_
     _, axs = plt.subplots(1, 3, figsize=(15, 5))
-    axs[0].set_xlabel("1 - alpha")
+    axs[0].set_xlabel("Confidence level")
     axs[0].set_ylabel("Quantile")
-    axs[0].scatter(1 - alpha, quantiles, label=method)
+    axs[0].scatter(confidence_level, quantiles, label=conformity_score)
     axs[0].legend()
-    axs[1].scatter(1 - alpha, coverage, label=method)
-    axs[1].set_xlabel("1 - alpha")
+    axs[1].scatter(confidence_level, coverage, label=conformity_score)
+    axs[1].set_xlabel("Confidence level")
     axs[1].set_ylabel("Coverage score")
     axs[1].plot([0, 1], [0, 1], label="x=y", color="black")
     axs[1].legend()
-    axs[2].scatter(1 - alpha, width, label=method)
-    axs[2].set_xlabel("1 - alpha")
+    axs[2].scatter(confidence_level, width, label=conformity_score)
+    axs[2].set_xlabel("Confidence level")
     axs[2].set_ylabel("Average size of prediction sets")
     axs[2].legend()
     plt.show()
 
 
-plot_coverages_widths(alpha_, coverage, mean_width, 'lac')
+plot_coverages_widths(confidence_level_, coverage, mean_width, 'lac')
 
 
 ##############################################################################
 # It is seen that the method gives coverages close to the target coverages,
-# regardless of the ``α`` value.
+# regardless of the ``confidence_level`` value.
+
+##############################################################################
+# Lastly, let us explore how the prediction sets change as a function of different
+# significance levels in identifying a specific range where the prediction
+# sets transition from containing at least one element to being potentially empty.
 
-alpha_ = np.arange(0.02, 0.16, 0.01)
+confidence_level_ = np.arange(0.99, 0.85, -0.01)
 
 calib = CalibratedClassifierCV(
     estimator=clf, method='sigmoid', cv='prefit'
 )
 calib.fit(X_c1, y_c1)
 
-mapie_clf = _MapieClassifier(
-    estimator=calib, cv='prefit', random_state=42
-)
-mapie_clf.fit(X_c2, y_c2)
-_, y_ps_mapie = mapie_clf.predict(
-    X, alpha=alpha_
+mapie_clf = SplitConformalClassifier(
+    estimator=calib, confidence_level=confidence_level_, prefit=True, random_state=42
 )
+mapie_clf.conformalize(X_c2, y_c2)
+_, y_ps_mapie = mapie_clf.predict_set(X_test)
 
 non_empty = np.mean(
-    np.any(mapie_clf.predict(X_test, alpha=alpha_)[1], axis=1), axis=0
+    np.any(mapie_clf.predict_set(X_test)[1], axis=1), axis=0
 )
 idx = np.argwhere(non_empty < 1)[0, 0]
 
 _, axs = plt.subplots(1, 3, figsize=(15, 5))
 plot_prediction_decision(y_pred_mapie, axs[0])
-_, y_ps = mapie_clf.predict(X_test, alpha=alpha_[idx-1])
-plot_prediction_set(y_ps[:, :, 0], np.round(alpha_[idx-1], 3), axs[1])
-_, y_ps = mapie_clf.predict(X_test, alpha=alpha_[idx+1])
-plot_prediction_set(y_ps[:, :, 0], np.round(alpha_[idx+1], 3), axs[2])
+_, y_ps = mapie_clf.predict_set(X_test)
+plot_prediction_set(
+    y_ps[:, :, idx-1], np.round(confidence_level_[idx-1], 3), axs[1]
+)
+_, y_ps = mapie_clf.predict_set(X_test)
+plot_prediction_set(
+    y_ps[:, :, idx+1], np.round(confidence_level_[idx+1], 3), axs[2]
+)
 
 plt.show()
+
+##############################################################################
+# In this section, we adjust the confidence level around the model's accuracy to
+# observe the changes in the sizes of the prediction sets. When the confidence level
+# matches the model's accuracy, we see a shift from potentially empty prediction sets
+# to sets that always contain at least one element.
+# The two plots on the right-hand side illustrate the size of the prediction sets for
+# each test sample just before and after this transition point. In our example, the
+# transition occurs at a confidence_level of 0.89 (i.e., the accuracy of the model).
+# This means that for confidence levels above 0.89, all prediction sets contain at
+# least one element. Conversely, for confidence levels below 0.89, some test samples
+# may have empty prediction sets.