Merge pull request #43 from simai-ml/alpha-as-list

gmartinonQM · web-flow · commit 588f71ca6fe9 · 2021-05-20T09:04:16.000+02:00
Accept alpha as a list or np.ndarray
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -11,6 +11,8 @@ History
 * Remove the `n_splits`, `shuffle` and `random_state` parameters
 * Simplify the `method` parameter
 * Fix typos in documentation and add methods descriptions in sphinx
+* Accept alpha parameter as a list or np.ndarray
+* If alpha is a list, `.predict()` returns a np.ndarray of shape (n_samples, 3, len(alpha))
 
 0.1.4 (2021-05-07)
 ------------------
diff --git a/README.rst b/README.rst
@@ -83,6 +83,8 @@ Here, we generate one-dimensional noisy data that we fit with a linear model.
 
 Since MAPIE is compliant with the standard scikit-learn API, we follow the standard
 sequential ``fit`` and ``predict`` process  like any scikit-learn regressor.
+We set two values for alpha to estimate prediction intervals at approximately one
+and two standard deviations from the mean.
 
 .. code:: python
 
@@ -92,8 +94,9 @@ sequential ``fit`` and ``predict`` process  like any scikit-learn regressor.
     y_preds = mapie.predict(X)
 
 
-MAPIE returns a ``np.ndarray`` of shape (n_samples, 3) giving the predictions,
-as well as the lower and upper bounds of the prediction intervals for the target quantile.
+MAPIE returns a ``np.ndarray`` of shape (n_samples, 3, len(alpha)) giving the predictions,
+as well as the lower and upper bounds of the prediction intervals for the target quantile
+for each desired alpha value.
 The estimated prediction intervals can then be plotted as follows. 
 
 .. code:: python
@@ -111,11 +114,11 @@ The estimated prediction intervals can then be plotted as follows.
     )
     plt.show()
 
-The title of the plot compares the target coverage with the effective coverage.
+The title of the plot compares the target coverages with the effective coverages.
 The target coverage, or the confidence interval, is the fraction of true labels lying in the
 prediction intervals that we aim to obtain for a given dataset.
-It is given by the alpha parameter defined in `MapieRegressor`, here equal to the default value of
-0.1 thus giving a target coverage of 0.9.
+It is given by the alpha parameter defined in ``MapieRegressor``, here equal to 0.05 and 0.32,
+thus giving target coverages of 0.95 and 0.68.
 The effective coverage is the actual fraction of true labels lying in the prediction intervals.
 
 
diff --git a/doc/images/quickstart_1.png b/doc/images/quickstart_1.png
diff --git a/doc/quick_start.rst b/doc/quick_start.rst
@@ -11,7 +11,7 @@ Estimate your prediction intervals
 1. Download and install the module
 ----------------------------------
 
-Install via `pip`:
+Install via ``pip``:
 
 .. code:: python
 
@@ -28,7 +28,8 @@ To install directly from the github repository :
 ---------------------
 
 Let us start with a basic regression problem. 
-Here, we generate one-dimensional noisy data that we fit with a linear model.
+Here, we generate one-dimensional noisy data with normal distribution
+that we fit with a linear model.
 
 .. code:: python
 
@@ -40,21 +41,25 @@ Here, we generate one-dimensional noisy data that we fit with a linear model.
     X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
 
 Since MAPIE is compliant with the standard scikit-learn API, we follow the standard
-sequential `fit` and `predict` process  like any scikit-learn regressor.
+sequential ``fit`` and ``predict`` process  like any scikit-learn regressor.
+We set two values for alpha to estimate prediction intervals at approximately one
+and two standard deviations from the mean.
 
 .. code:: python
 
     from mapie.estimators import MapieRegressor
-    mapie = MapieRegressor(regressor)
+    alpha = [0.05, 0.32]
+    mapie = MapieRegressor(regressor, alpha=alpha, method="plus")
     mapie.fit(X, y)
     y_preds = mapie.predict(X)
 
 
 3. Show the results
 -------------------
 
-MAPIE returns a `np.ndarray` of shape (n_samples, 3) giving the predictions,
-as well as the lower and upper bounds of the prediction intervals for the target quantile.
+MAPIE returns a ``np.ndarray`` of shape (n_samples, 3, len(alpha)) giving the predictions,
+as well as the lower and upper bounds of the prediction intervals for the target quantile
+for each desired alpha value.
 The estimated prediction intervals can then be plotted as follows. 
 
 .. code:: python
@@ -64,11 +69,15 @@ The estimated prediction intervals can then be plotted as follows.
     plt.xlabel("x")
     plt.ylabel("y")
     plt.scatter(X, y, alpha=0.3)
-    plt.plot(X, y_preds[:, 0], color="C1")
+    plt.plot(X, y_preds[:, 0, 0], color="C1")
     order = np.argsort(X[:, 0])
-    plt.fill_between(X[order].ravel(), y_preds[:, 1][order], y_preds[:, 2][order], alpha=0.3)
+    plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
+    plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
+    plt.fill_between(X[order].ravel(), y_preds[:, 1, 0][order].ravel(), y_preds[:, 2, 0][order].ravel(), alpha=0.2)
+    coverage_scores = [coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i]) for i, _ in enumerate(alpha)]
     plt.title(
-        f"Target coverage = 0.9; Effective coverage = {coverage_score(y, y_preds[:, 1], y_preds[:, 2])}"
+        f"Target and effective coverages for alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n" +
+        f"Target and effective coverages for alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
     )
     plt.show()
 
@@ -77,9 +86,9 @@ The estimated prediction intervals can then be plotted as follows.
     :width: 400
     :align: center
 
-The title of the plot compares the target coverage with the effective coverage.
+The title of the plot compares the target coverages with the effective coverages.
 The target coverage, or the confidence interval, is the fraction of true labels lying in the
 prediction intervals that we aim to obtain for a given dataset.
-It is given by the alpha parameter defined in `MapieRegressor`, here equal to the default value of
-0.1 thus giving a target coverage of 0.9.
+It is given by the alpha parameter defined in ``MapieRegressor``, here equal to ``0.05`` and ``0.32``,
+thus giving target coverages of 0.95 and 0.68.
 The effective coverage is the actual fraction of true labels lying in the prediction intervals.
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
@@ -115,7 +115,7 @@ in order to obtain a 95% confidence for our prediction intervals.
     for strategy, params in STRATEGIES:
         mapie = MapieRegressor(polyn_model, alpha=0.05, ensemble=False, **params)
         mapie.fit(X_train, y_train)
-        prediction_interval[method] = mapie.predict(X_test)
+        prediction_interval[method] = mapie.predict(X_test)[:, :, 0]
 
 Let’s now compare the confidence intervals with the predicted intervals with obtained 
 by the Jackknife+, Jackknife-minmax, CV+, and CV-minmax strategies.
@@ -332,7 +332,7 @@ strategies.
     for strategy, params in STRATEGIES:
         mapie = MapieRegressor(polyn_model, alpha=0.05, ensemble=False, **params)
         mapie.fit(X_train, y_train)
-        prediction_interval[method] = mapie.predict(X_test)
+        prediction_interval[method] = mapie.predict(X_test)[:, :, 0]
 
 
 .. code:: python
@@ -546,7 +546,7 @@ and compare their prediction interval.
     for name, model in zip(model_names, models):
         mapie = MapieRegressor(model, alpha=0.05, method="plus", cv=5, ensemble=True)
         mapie.fit(X_train, y_train)
-        prediction_interval[name] = mapie.predict(X_test)
+        prediction_interval[name] = mapie.predict(X_test)[:, :, 0]
 
 .. code:: python
 
diff --git a/examples/plot_barber2020_simulations.py b/examples/plot_barber2020_simulations.py
@@ -103,7 +103,7 @@ def PIs_vs_dimensions(
                     **params
                 )
                 mapie.fit(X_train, y_train)
-                y_preds = mapie.predict(X_test)
+                y_preds = mapie.predict(X_test)[:, :, 0]
                 results[strategy][dimension]["coverage"][trial] = coverage_score(
                     y_test, y_preds[:, 1], y_preds[:, 2]
                 )
diff --git a/examples/plot_homoscedastic_1d_data.py b/examples/plot_homoscedastic_1d_data.py
@@ -149,7 +149,7 @@ def plot_1d_data(
         **params
     )
     mapie.fit(X_train.reshape(-1, 1), y_train)
-    y_preds = mapie.predict(X_test.reshape(-1, 1))
+    y_preds = mapie.predict(X_test.reshape(-1, 1))[:, :, 0]
     plot_1d_data(
         X_train,
         y_train,
diff --git a/examples/plot_nested-cv.py b/examples/plot_nested-cv.py
@@ -95,7 +95,7 @@
     n_jobs=-1
 )
 mapie_non_nested.fit(X_train, y_train)
-y_preds_non_nested = mapie_non_nested.predict(X_test)
+y_preds_non_nested = mapie_non_nested.predict(X_test)[:, :, 0]
 widths_non_nested = y_preds_non_nested[:, 2] - y_preds_non_nested[:, 1]
 coverage_non_nested = coverage_score(y_test, y_preds_non_nested[:, 1], y_preds_non_nested[:, 2])
 score_non_nested = mean_squared_error(y_test, y_preds_non_nested[:, 0], squared=False)
@@ -120,7 +120,7 @@
     ensemble=True
 )
 mapie_nested.fit(X_train, y_train)
-y_preds_nested = mapie_nested.predict(X_test)
+y_preds_nested = mapie_nested.predict(X_test)[:, :, 0]
 widths_nested = y_preds_nested[:, 2] - y_preds_nested[:, 1]
 coverage_nested = coverage_score(y_test, y_preds_nested[:, 1], y_preds_nested[:, 2])
 score_nested = mean_squared_error(y_test, y_preds_nested[:, 0], squared=False)
diff --git a/examples/plot_toy_model.py b/examples/plot_toy_model.py
@@ -16,17 +16,22 @@
 regressor = LinearRegression()
 X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
 
-mapie = MapieRegressor(regressor, n_jobs=-1)
+alpha = [0.05, 0.32]
+mapie = MapieRegressor(regressor, alpha=alpha, method="plus")
 mapie.fit(X, y)
 y_preds = mapie.predict(X)
 
 plt.xlabel("x")
 plt.ylabel("y")
 plt.scatter(X, y, alpha=0.3)
-plt.plot(X, y_preds[:, 0], color="C1")
+plt.plot(X, y_preds[:, 0, 0], color="C1")
 order = np.argsort(X[:, 0])
-plt.fill_between(X[order].ravel(), y_preds[:, 1][order], y_preds[:, 2][order], alpha=0.3)
+plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
+plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
+plt.fill_between(X[order].ravel(), y_preds[:, 1, 0][order].ravel(), y_preds[:, 2, 0][order].ravel(), alpha=0.2)
+coverage_scores = [coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i]) for i, _ in enumerate(alpha)]
 plt.title(
-    f"Target coverage = 0.9; Effective coverage = {coverage_score(y, y_preds[:, 1], y_preds[:, 2])}"
+    f"Target and effective coverages for alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n" +
+    f"Target and effective coverages for alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
 )
 plt.show()
diff --git a/mapie/estimators.py b/mapie/estimators.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Optional, Union, Tuple, List
+from typing import Optional, Union, Iterable, Tuple, List
 
 import numpy as np
 from joblib import Parallel, delayed
@@ -29,7 +29,8 @@ class MapieRegressor(BaseEstimator, RegressorMixin):  # type: ignore
         Any regressor with scikit-learn API (i.e. with fit and predict methods), by default None.
         If ``None``, estimator defaults to a ``LinearRegression`` instance.
 
-    alpha: float, optional
+    alpha: Union[float, Iterable[float]], optional
+        Can be a float, a list of floats, or a np.ndarray of floats.
         Between 0 and 1, represent the uncertainty of the confidence interval.
         Lower alpha produce larger (more conservative) prediction intervals.
         alpha is the complement of the target coverage level.
@@ -118,7 +119,7 @@ class MapieRegressor(BaseEstimator, RegressorMixin):  # type: ignore
     >>> X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1)
     >>> y_toy = np.array([5, 7.5, 9.5, 10.5, 12.5, 15])
     >>> pireg = MapieRegressor(LinearRegression())
-    >>> print(pireg.fit(X_toy, y_toy).predict(X_toy))
+    >>> print(pireg.fit(X_toy, y_toy).predict(X_toy)[:, :, 0])
     [[ 5.28571429  4.61627907  6.        ]
      [ 7.17142857  6.51744186  7.8       ]
      [ 9.05714286  8.4         9.68023256]
@@ -137,7 +138,7 @@ class MapieRegressor(BaseEstimator, RegressorMixin):  # type: ignore
     def __init__(
         self,
         estimator: Optional[RegressorMixin] = None,
-        alpha: float = 0.1,
+        alpha: Union[float, Iterable[float]] = 0.1,
         method: str = "plus",
         cv: Optional[Union[int, BaseCrossValidator]] = None,
         n_jobs: Optional[int] = None,
@@ -161,9 +162,6 @@ def _check_parameters(self) -> None:
         ValueError
             Is parameters are not valid.
         """
-        if not isinstance(self.alpha, float) or not 0 < self.alpha < 1:
-            raise ValueError("Invalid alpha. Allowed values are between 0 and 1.")
-
         if self.method not in self.valid_methods_:
             raise ValueError("Invalid method. Allowed values are 'naive', 'base', 'plus' and 'minmax'.")
 
@@ -241,6 +239,43 @@ def _check_cv(self, cv: Optional[Union[int, BaseCrossValidator]] = None) -> Base
             return cv
         raise ValueError("Invalid cv argument. Allowed values are None, -1, int >= 2, KFold or LeaveOneOut.")
 
+    def _check_alpha(self, alpha: Union[float, Iterable[float]]) -> np.ndarray:
+        """
+        Check alpha and prepare it as a np.ndarray
+
+        Parameters
+        ----------
+        alpha : Union[float, Iterable[float]]
+        Can be a float, a list of floats, or a np.ndarray of floats.
+        Between 0 and 1, represent the uncertainty of the confidence interval.
+        Lower alpha produce larger (more conservative) prediction intervals.
+        alpha is the complement of the target coverage level.
+        Only used at prediction time. By default 0.1.
+
+        Returns
+        -------
+        np.ndarray
+            Prepared alpha.
+
+        Raises
+        ------
+        ValueError
+            If alpha is not a float or an Iterable of floats between 0 and 1.
+        """
+        if isinstance(alpha, float):
+            alpha_np = np.array([alpha])
+        elif isinstance(alpha, Iterable):
+            alpha_np = np.array(alpha)
+        else:
+            raise ValueError("Invalid alpha. Allowed values are float or Iterable.")
+        if len(alpha_np.shape) != 1:
+            raise ValueError("Invalid alpha. Please provide a one-dimensional list of values.")
+        if alpha_np.dtype.type not in [np.float64, np.float32]:
+            raise ValueError("Invalid alpha. Allowed values are Iterable of floats.")
+        if np.any((alpha_np <= 0) | (alpha_np >= 1)):
+            raise ValueError("Invalid alpha. Allowed values are between 0 and 1.")
+        return alpha_np
+
     def _fit_and_predict_oof_model(
         self,
         estimator: RegressorMixin,
@@ -350,19 +385,21 @@ def predict(self, X: ArrayLike) -> np.ndarray:
 
         Returns
         -------
-        np.ndarray of shape (n_samples, 3)
+        np.ndarray of shape (n_samples, 3, len(alpha))
 
-            - [0]: Center of the prediction interval
-            - [1]: Lower bound of the prediction interval
-            - [2]: Upper bound of the prediction interval
+            - [:, 0, :]: Center of the prediction interval
+            - [:, 1, :]: Lower bound of the prediction interval
+            - [:, 2, :]: Upper bound of the prediction interval
         """
         check_is_fitted(self, ["single_estimator_", "estimators_", "k_", "residuals_"])
         X = check_array(X, force_all_finite=False, dtype=["float64", "object"])
         y_pred = self.single_estimator_.predict(X)
+        alpha = self._check_alpha(self.alpha)
         if self.method in ["naive", "base"]:
-            quantile = np.quantile(self.residuals_, 1 - self.alpha, interpolation="higher")
-            y_pred_low = y_pred - quantile
-            y_pred_up = y_pred + quantile
+            quantile = np.quantile(self.residuals_, 1 - alpha, interpolation="higher")
+            # broadcast y_pred to get y_pred_low/up of shape (n_samples_test, len(alpha))
+            y_pred_low = y_pred[:, np.newaxis] - quantile
+            y_pred_up = y_pred[:, np.newaxis] + quantile
         else:
             y_pred_multi = np.stack([e.predict(X) for e in self.estimators_], axis=1)
             if self.method == "plus":
@@ -373,8 +410,14 @@ def predict(self, X: ArrayLike) -> np.ndarray:
             if self.method == "minmax":
                 lower_bounds = np.min(y_pred_multi, axis=1, keepdims=True) - self.residuals_
                 upper_bounds = np.max(y_pred_multi, axis=1, keepdims=True) + self.residuals_
-            y_pred_low = np.quantile(lower_bounds, self.alpha, axis=1, interpolation="lower")
-            y_pred_up = np.quantile(upper_bounds, 1 - self.alpha, axis=1, interpolation="higher")
+            y_pred_low = np.stack([
+                np.quantile(lower_bounds, _alpha, axis=1, interpolation="lower") for _alpha in alpha
+            ], axis=1)
+            y_pred_up = np.stack([
+                np.quantile(upper_bounds, 1 - _alpha, axis=1, interpolation="higher") for _alpha in alpha
+            ], axis=1)
             if self.ensemble:
                 y_pred = np.median(y_pred_multi, axis=1)
+        # tile y_pred to get same shape as y_pred_low/up
+        y_pred = np.tile(y_pred, (alpha.shape[0], 1)).T
         return np.stack([y_pred, y_pred_low, y_pred_up], axis=1)
diff --git a/mapie/tests/test_estimators.py b/mapie/tests/test_estimators.py

Original file line number	Diff line number	Diff line change
`@@ -103,7 +103,7 @@ def PIs_vs_dimensions(`
`103`	`103`	`**params`
`104`	`104`	`)`
`105`	`105`	`mapie.fit(X_train, y_train)`
`106`		`- y_preds = mapie.predict(X_test)`
	`106`	`+ y_preds = mapie.predict(X_test)[:, :, 0]`
`107`	`107`	`results[strategy][dimension]["coverage"][trial] = coverage_score(`
`108`	`108`	`y_test, y_preds[:, 1], y_preds[:, 2]`
`109`	`109`	`)`
Original file line number	Diff line number	Diff line change
`@@ -149,7 +149,7 @@ def plot_1d_data(`
`149`	`149`	`**params`
`150`	`150`	`)`
`151`	`151`	`mapie.fit(X_train.reshape(-1, 1), y_train)`
`152`		`- y_preds = mapie.predict(X_test.reshape(-1, 1))`
	`152`	`+ y_preds = mapie.predict(X_test.reshape(-1, 1))[:, :, 0]`
`153`	`153`	`plot_1d_data(`
`154`	`154`	`X_train,`
`155`	`155`	`y_train,`