UPD: adapt quantile formula and results

Thibault Cordier · Thibault Cordier · commit 5e648510e9ef · 2024-05-29T15:14:51.000+02:00
diff --git a/mapie/conformity_scores/conformity_scores.py b/mapie/conformity_scores/conformity_scores.py
@@ -214,7 +214,7 @@ def get_quantile(
         conformity_scores: NDArray,
         alpha_np: NDArray,
         axis: int,
-        method: str
+        reversed: bool = False
     ) -> NDArray:
         """
         Compute the alpha quantile of the conformity scores or the conformity
@@ -235,28 +235,29 @@ def get_quantile(
         axis: int
             The axis from which to compute the quantile.
 
-        method: str
-            ``"higher"`` or ``"lower"`` the method to compute the quantile.
+        reversed: bool
+            Boolean specifying whether we take the upper or lower quantile,
+            if False, the alpha quantile, otherwise the (1-alpha) quantile.
 
         Returns
         -------
         NDArray of shape (1, n_alpha) or (n_samples, n_alpha)
             The quantile of the conformity scores.
         """
-        n_ref = conformity_scores.shape[-1]
-        # TODO: assume that each group has same n_calib when using plus method
-        n_calib = np.min(np.sum(~np.isnan(conformity_scores), axis=0))
-        quantile = np.column_stack([
+        n_ref = conformity_scores.shape[1-axis]
+        n_calib = np.min(np.sum(~np.isnan(conformity_scores), axis=axis))
+        signed = 1-2*reversed
+        alpha_ref = (1-2*alpha_np)*reversed + alpha_np
+
+        quantile = signed * np.column_stack([
             np_nanquantile(
-                conformity_scores.astype(float),
-                np.ceil(_alpha*(n_calib + 1))/n_calib,
+                signed * conformity_scores.astype(float),
+                np.ceil(_alpha*(n_calib+1))/n_calib,
                 axis=axis,
-                method=method
-            ) if n_calib and 0 < np.ceil(_alpha*(n_calib + 1))/n_calib < 1
-            else np.nan * np.ones(n_ref) if not n_calib
-            else np.inf * np.ones(n_ref) if method == "higher"
-            else - np.inf * np.ones(n_ref)
-            for _alpha in alpha_np
+                method="lower"
+            ) if 0 < np.ceil(_alpha*(n_calib+1))/n_calib < 1
+            else np.inf * np.ones(n_ref)
+            for _alpha in alpha_ref
         ])
         return quantile
 
@@ -284,7 +285,7 @@ def _beta_optimize(
         -------
         NDArray
             Array of betas minimizing the differences
-            ``(1-alpa+beta)-quantile - beta-quantile``.
+            ``(1-alpha+beta)-quantile - beta-quantile``.
         """
         beta_np = np.full(
             shape=(len(lower_bounds), len(alpha_np)),
@@ -408,26 +409,34 @@ def get_bounds(
                 X, y_pred_up, conformity_scores
             )
             bound_low = self.get_quantile(
-                conformity_scores_low, alpha_low, axis=1, method="lower"
+                conformity_scores_low, alpha_low, axis=1, reversed=True
             )
             bound_up = self.get_quantile(
-                conformity_scores_up, alpha_up, axis=1, method="higher"
+                conformity_scores_up, alpha_up, axis=1
             )
+
         else:
-            quantile_search = "higher" if self.sym else "lower"
-            alpha_low = 1 - alpha_np if self.sym else beta_np
-            alpha_up = 1 - alpha_np if self.sym else 1 - alpha_np + beta_np
+            if self.sym:
+                alpha_ref = 1-alpha_np
+                quantile_ref = self.get_quantile(
+                    conformity_scores[..., np.newaxis], alpha_ref, axis=0
+                )
+                quantile_low, quantile_up = -quantile_ref, quantile_ref
+
+            else:
+                alpha_low, alpha_up = beta_np, 1 - alpha_np + beta_np
+
+                quantile_low = self.get_quantile(
+                    conformity_scores[..., np.newaxis],
+                    alpha_low, axis=0, reversed=True
+                )
+                quantile_up = self.get_quantile(
+                    conformity_scores[..., np.newaxis],
+                    alpha_up, axis=0
+                )
 
-            quantile_low = self.get_quantile(
-                conformity_scores[..., np.newaxis],
-                alpha_low, axis=0, method=quantile_search
-            )
-            quantile_up = self.get_quantile(
-                conformity_scores[..., np.newaxis],
-                alpha_up, axis=0, method="higher"
-            )
             bound_low = self.get_estimation_distribution(
-                X, y_pred_low, signed * quantile_low
+                X, y_pred_low, quantile_low
             )
             bound_up = self.get_estimation_distribution(
                 X, y_pred_up, quantile_up
diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py
@@ -192,12 +192,12 @@ class MapieRegressor(BaseEstimator, RegressorMixin):
     >>> mapie_reg = mapie_reg.fit(X_toy, y_toy)
     >>> y_pred, y_pis = mapie_reg.predict(X_toy, alpha=0.5)
     >>> print(y_pis[:, :, 0])
-    [[ 4.84285714  5.72857143]
-     [ 6.72857143  7.61428571]
-     [ 8.61428571  9.5       ]
-     [10.5        11.38571429]
-     [12.38571429 13.27142857]
-     [14.27142857 15.15714286]]
+    [[ 4.95714286  5.61428571]
+     [ 6.84285714  7.5       ]
+     [ 8.72857143  9.38571429]
+     [10.61428571 11.27142857]
+     [12.5        13.15714286]
+     [14.38571429 15.04285714]]
     >>> print(y_pred)
     [ 5.28571429  7.17142857  9.05714286 10.94285714 12.82857143 14.71428571]
     """
diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py
@@ -138,15 +138,15 @@
 }
 
 WIDTHS = {
-    "naive": 3.87,
-    "split": 3.96,
-    "jackknife": 3.97,
+    "naive": 3.80,
+    "split": 3.89,
+    "jackknife": 3.89,
     "jackknife_plus": 3.90,
-    "jackknife_minmax": 4.03,
+    "jackknife_minmax": 3.96,
     "cv": 3.88,
     "cv_plus": 3.91,
     "cv_minmax": 4.07,
-    "prefit": 3.96,
+    "prefit": 3.89,
     "cv_plus_median": 3.91,
     "jackknife_plus_ab": 3.90,
     "jackknife_minmax_ab": 4.14,
diff --git a/mapie/tests/test_time_series_regression.py b/mapie/tests/test_time_series_regression.py
@@ -95,12 +95,12 @@
 
 WIDTHS = {
     "blockbootstrap_enbpi_mean_wopt": 3.86,
-    "blockbootstrap_enbpi_median_wopt": 3.76,
+    "blockbootstrap_enbpi_median_wopt": 3.85,
     "blockbootstrap_enbpi_mean": 3.86,
-    "blockbootstrap_enbpi_median": 3.76,
-    "blockbootstrap_aci_mean": 4.03,
-    "blockbootstrap_aci_median": 4.03,
-    "prefit": 4.79,
+    "blockbootstrap_enbpi_median": 3.85,
+    "blockbootstrap_aci_mean": 3.96,
+    "blockbootstrap_aci_median": 3.95,
+    "prefit": 4.86,
 }
 
 COVERAGES = {
@@ -110,7 +110,7 @@
     "blockbootstrap_enbpi_median": 0.946,
     "blockbootstrap_aci_mean": 0.96,
     "blockbootstrap_aci_median": 0.96,
-    "prefit": 0.96,
+    "prefit": 0.97,
 }
 
 
@@ -423,8 +423,8 @@ def test_interval_prediction_with_beta_optimize() -> None:
     coverage = regression_coverage_score(
         y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
     )
-    np.testing.assert_allclose(width_mean, 4.27, rtol=1e-2)
-    np.testing.assert_allclose(coverage, 0.93, rtol=1e-2)
+    np.testing.assert_allclose(width_mean, 3.67, rtol=1e-2)
+    np.testing.assert_allclose(coverage, 0.916, rtol=1e-2)
 
 
 def test_deprecated_path_warning() -> None: