FIX RCPS URL

gmartinonQM · FaustinPulveric · commit 7c90adb8ecad · 2025-08-19T12:11:39.000+02:00
diff --git a/mapie/risk_control.py b/mapie/risk_control.py
@@ -11,8 +11,12 @@
 from sklearn.multioutput import MultiOutputClassifier
 from sklearn.pipeline import Pipeline
 from sklearn.utils import check_random_state
-from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted,
-                                      indexable)
+from sklearn.utils.validation import (
+    _check_y,
+    _num_samples,
+    check_is_fitted,
+    indexable,
+)
 
 from numpy.typing import ArrayLike, NDArray
 from .control_risk.crc_rcps import find_lambda_star, get_r_hat_plus
@@ -127,7 +131,7 @@ class PrecisionRecallController(BaseEstimator, ClassifierMixin):
     [1] Lihua Lei Jitendra Malik Stephen Bates, Anastasios Angelopoulos
     and Michael I. Jordan. Distribution-free, risk-controlling prediction
     sets. CoRR, abs/2101.02703, 2021.
-    URL https://arxiv.org/abs/2101.02703.39
+    URL https://arxiv.org/abs/2101.02703
 
     [2] Angelopoulos, Anastasios N., Stephen, Bates, Adam, Fisch, Lihua,
     Lei, and Tal, Schuster. "Conformal Risk Control." (2022).
@@ -153,30 +157,25 @@ class PrecisionRecallController(BaseEstimator, ClassifierMixin):
      [False  True False]
      [False  True False]]
     """
-    valid_methods_by_metric_ = {
-        "precision": ["ltt"],
-        "recall": ["rcps", "crc"]
-    }
+
+    valid_methods_by_metric_ = {"precision": ["ltt"], "recall": ["rcps", "crc"]}
     valid_methods = list(chain(*valid_methods_by_metric_.values()))
     valid_metric_ = list(valid_methods_by_metric_.keys())
     valid_bounds_ = ["hoeffding", "bernstein", "wsr", None]
     lambdas = np.arange(0, 1, 0.01)
     n_lambdas = len(lambdas)
-    fit_attributes = [
-        "single_estimator_",
-        "risks"
-    ]
+    fit_attributes = ["single_estimator_", "risks"]
     sigma_init = 0.25  # Value given in the paper [1]
-    cal_size = .3
+    cal_size = 0.3
 
     def __init__(
         self,
         estimator: Optional[ClassifierMixin] = None,
-        metric_control: Optional[str] = 'recall',
+        metric_control: Optional[str] = "recall",
         method: Optional[str] = None,
         n_jobs: Optional[int] = None,
         random_state: Optional[Union[int, np.random.RandomState]] = None,
-        verbose: int = 0
+        verbose: int = 0,
     ) -> None:
         self.estimator = estimator
         self.metric_control = metric_control
@@ -211,16 +210,18 @@ def _check_method(self) -> None:
         self.method = cast(str, self.method)
         self.metric_control = cast(str, self.metric_control)
 
-        if self.method not in self.valid_methods_by_metric_[
-            self.metric_control
-        ]:
+        if (
+            self.method
+            not in self.valid_methods_by_metric_[self.metric_control]
+        ):
             raise ValueError(
                 "Invalid method for metric: "
-                + "You are controlling " + self.metric_control
-                + " and you are using invalid method: " + self.method
-                + ". Use instead: " + "".join(self.valid_methods_by_metric_[
-                    self.metric_control]
-                )
+                + "You are controlling "
+                + self.metric_control
+                + " and you are using invalid method: "
+                + self.method
+                + ". Use instead: "
+                + "".join(self.valid_methods_by_metric_[self.metric_control])
             )
 
     def _check_all_labelled(self, y: NDArray) -> None:
@@ -241,9 +242,7 @@ def _check_all_labelled(self, y: NDArray) -> None:
         """
         if not (y.sum(axis=1) > 0).all():
             raise ValueError(
-                "Invalid y. "
-                "All observations should contain at "
-                "least one label."
+                "Invalid y. All observations should contain at least one label."
             )
 
     def _check_delta(self, delta: Optional[float]):
@@ -268,8 +267,7 @@ def _check_delta(self, delta: Optional[float]):
         """
         if (not isinstance(delta, float)) and (delta is not None):
             raise ValueError(
-                "Invalid delta. "
-                f"delta must be a float, not a {type(delta)}"
+                f"Invalid delta. delta must be a float, not a {type(delta)}"
             )
         if (self.method == "rcps") or (self.method == "ltt"):
             if delta is None:
@@ -278,11 +276,8 @@ def _check_delta(self, delta: Optional[float]):
                     "delta cannot be ``None`` when controlling "
                     "Recall with RCPS or Precision with LTT"
                 )
-            elif ((delta <= 0) or (delta >= 1)):
-                raise ValueError(
-                    "Invalid delta. "
-                    "delta must be in ]0, 1["
-                )
+            elif (delta <= 0) or (delta >= 1):
+                raise ValueError("Invalid delta. delta must be in ]0, 1[")
         if (self.method == "crc") and (delta is not None):
             warnings.warn(
                 "WARNING: you are using crc method, hence "
@@ -302,7 +297,8 @@ def _check_valid_index(self, alpha: NDArray):
             if self.valid_index[i] == []:
                 warnings.warn(
                     "Warning: LTT method has returned an empty sequence"
-                    + " for alpha=" + str(alpha[i])
+                    + " for alpha="
+                    + str(alpha[i])
                 )
 
     def _check_estimator(
@@ -361,14 +357,12 @@ def _check_estimator(
                 "use partial_fit."
             )
         if (estimator is None) and (_refit):
-            estimator = MultiOutputClassifier(
-                LogisticRegression()
-            )
+            estimator = MultiOutputClassifier(LogisticRegression())
             X_train, X_conf, y_train, y_conf = train_test_split(
-                    X,
-                    y,
-                    test_size=self.conformalize_size,
-                    random_state=self.random_state,
+                X,
+                y,
+                test_size=self.conformalize_size,
+                random_state=self.random_state,
             )
             estimator.fit(X_train, y_train)
             warnings.warn(
@@ -460,8 +454,7 @@ def _check_metric_control(self):
                 self.method = "ltt"
 
     def _transform_pred_proba(
-        self,
-        y_pred_proba: Union[Sequence[NDArray], NDArray]
+        self, y_pred_proba: Union[Sequence[NDArray], NDArray]
     ) -> NDArray:
         """If the output of the predict_proba is a list of arrays (output of
         the ``predict_proba`` of ``MultiOutputClassifier``) we transform it
@@ -483,7 +476,7 @@ def _transform_pred_proba(
         else:
             y_pred_proba_stacked = np.stack(
                 y_pred_proba,  # type: ignore
-                axis=0
+                axis=0,
             )[:, :, 1]
             y_pred_proba_array = np.moveaxis(y_pred_proba_stacked, 0, -1)
 
@@ -526,10 +519,7 @@ def partial_fit(
 
         X, y = indexable(X, y)
         _check_y(y, multi_output=True)
-        estimator, X, y = self._check_estimator(
-            X, y, self.estimator,
-            _refit
-        )
+        estimator, X, y = self._check_estimator(X, y, self.estimator, _refit)
 
         y = cast(NDArray, y)
         X = cast(NDArray, X)
@@ -561,15 +551,11 @@ def partial_fit(
             y_pred_proba_array = self._transform_pred_proba(y_pred_proba)
             if self.metric_control == "recall":
                 partial_risk = compute_risk_recall(
-                    self.lambdas,
-                    y_pred_proba_array,
-                    y
+                    self.lambdas, y_pred_proba_array, y
                 )
             else:  # self.metric_control == "precision"
                 partial_risk = compute_risk_precision(
-                    self.lambdas,
-                    y_pred_proba_array,
-                    y
+                    self.lambdas, y_pred_proba_array, y
                 )
             self.risks = np.concatenate([self.risks, partial_risk], axis=0)
 
@@ -579,7 +565,7 @@ def fit(
         self,
         X: ArrayLike,
         y: ArrayLike,
-        conformalize_size: Optional[float] = .3
+        conformalize_size: Optional[float] = 0.3,
     ) -> PrecisionRecallController:
         """
         Fit the base estimator or use the fitted base estimator.
@@ -611,7 +597,7 @@ def predict(
         X: ArrayLike,
         alpha: Optional[Union[float, Iterable[float]]] = None,
         delta: Optional[float] = None,
-        bound: Optional[Union[str, None]] = None
+        bound: Optional[Union[str, None]] = None,
     ) -> Union[NDArray, Tuple[NDArray, NDArray]]:
         """
         Prediction sets on new samples based on target confidence
@@ -674,35 +660,37 @@ def predict(
 
         y_pred_proba_array = self._transform_pred_proba(y_pred_proba)
         y_pred_proba_array = np.repeat(
-            y_pred_proba_array,
-            len(alpha_np),
-            axis=2
+            y_pred_proba_array, len(alpha_np), axis=2
         )
-        if self.metric_control == 'precision':
+        if self.metric_control == "precision":
             self.n_obs = len(self.risks)
             self.r_hat = self.risks.mean(axis=0)
             self.valid_index, self.p_values = ltt_procedure(
                 self.r_hat, alpha_np, delta, self.n_obs
             )
             self._check_valid_index(alpha_np)
             self.lambdas_star, self.r_star = find_lambda_control_star(
-               self.r_hat, self.valid_index, self.lambdas
+                self.r_hat, self.valid_index, self.lambdas
             )
             y_pred_proba_array = (
-                y_pred_proba_array >
-                np.array(self.lambdas_star)[np.newaxis, np.newaxis, :]
+                y_pred_proba_array
+                > np.array(self.lambdas_star)[np.newaxis, np.newaxis, :]
             )
 
         else:
             self.r_hat, self.r_hat_plus = get_r_hat_plus(
-                self.risks, self.lambdas, self.method,
-                bound, delta, self.sigma_init
+                self.risks,
+                self.lambdas,
+                self.method,
+                bound,
+                delta,
+                self.sigma_init,
             )
             self.lambdas_star = find_lambda_star(
                 self.lambdas, self.r_hat_plus, alpha_np
             )
             y_pred_proba_array = (
-                y_pred_proba_array >
-                self.lambdas_star[np.newaxis, np.newaxis, :]
+                y_pred_proba_array
+                > self.lambdas_star[np.newaxis, np.newaxis, :]
             )
         return y_pred, y_pred_proba_array