Merge pull request #56 from simai-ml/move-alpha-to-predict

gmartinonQM · web-flow · commit 9041d4ddfefd · 2021-06-09T17:53:11.000+02:00
Move alpha to predict and make corrections in tests, examples, and docs
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -5,6 +5,7 @@ History
 0.2.2 (2021-06-XX)
 ------------------
 
+* Set alpha parameter as predict argument, with None as default value
 * Switch to github actions for continuous integration of the code
 * Add image explaining MAPIE internals on the README
 
diff --git a/README.rst b/README.rst
@@ -87,9 +87,9 @@ and two standard deviations from the mean.
 
     from mapie.estimators import MapieRegressor
     alpha = [0.05, 0.32]
-    mapie = MapieRegressor(regressor, alpha=alpha)
+    mapie = MapieRegressor(regressor)
     mapie.fit(X, y)
-    y_preds = mapie.predict(X)
+    y_pred, y_pis = mapie.predict(X, alpha=alpha)
 
 
 
@@ -105,18 +105,18 @@ The estimated prediction intervals can then be plotted as follows.
     plt.xlabel("x")
     plt.ylabel("y")
     plt.scatter(X, y, alpha=0.3)
-    plt.plot(X, y_preds[:, 0, 0], color="C1")
+    plt.plot(X, y_pred, color="C1")
     order = np.argsort(X[:, 0])
-    plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
-    plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
+    plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
+    plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
     plt.fill_between(
         X[order].ravel(),
-        y_preds[:, 1, 0][order].ravel(),
-        y_preds[:, 2, 0][order].ravel(),
+        y_pis[order][:, 0, 0].ravel(),
+        y_pis[order][:, 1, 0].ravel(),
         alpha=0.2
     )
     coverage_scores = [
-        coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
+        coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
         for i, _ in enumerate(alpha)
     ]
     plt.title(
diff --git a/doc/images/quickstart_1.png b/doc/images/quickstart_1.png
diff --git a/doc/quick_start.rst b/doc/quick_start.rst
@@ -49,9 +49,9 @@ and two standard deviations from the mean.
 
     from mapie.estimators import MapieRegressor
     alpha = [0.05, 0.32]
-    mapie = MapieRegressor(regressor, alpha=alpha)
+    mapie = MapieRegressor(regressor)
     mapie.fit(X, y)
-    y_preds = mapie.predict(X)
+    y_pred, y_pis = mapie.predict(X, alpha=alpha)
 
 
 3. Show the results
@@ -68,7 +68,7 @@ The estimated prediction intervals can then be plotted as follows.
     from mapie.metrics import coverage_score
 
     coverage_scores = [
-        coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
+        coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
         for i, _ in enumerate(alpha)
     ]
 
@@ -77,12 +77,12 @@ The estimated prediction intervals can then be plotted as follows.
     plt.scatter(X, y, alpha=0.3)
     plt.plot(X, y_preds[:, 0, 0], color="C1")
     order = np.argsort(X[:, 0])
-    plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
-    plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
+    plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
+    plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
     plt.fill_between(
         X[order].ravel(),
-        y_preds[:, 1, 0][order].ravel(),
-        y_preds[:, 2, 0][order].ravel(),
+        y_pis[order][:, 0, 0].ravel(),
+        y_pis[order][:, 1, 0].ravel(),
         alpha=0.2
     )
     plt.title(
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
@@ -112,11 +112,11 @@ in order to obtain a 95% confidence for our prediction intervals.
         "cv_plus": dict(method="plus", cv=10),
         "cv_minmax": dict(method="minmax", cv=10),
     }
-    y_preds = {}
+    y_pred, y_pis = {}, {}
     for strategy, params in STRATEGIES.items():
-        mapie = MapieRegressor(polyn_model, alpha=0.05, ensemble=False, **params)
+        mapie = MapieRegressor(polyn_model, ensemble=False, **params)
         mapie.fit(X_train, y_train)
-        y_preds[strategy] = mapie.predict(X_test)[:, :, 0]
+        y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
 
 Let’s now compare the confidence intervals with the predicted intervals with obtained 
 by the Jackknife+, Jackknife-minmax, CV+, and CV-minmax strategies.
@@ -159,9 +159,9 @@ by the Jackknife+, Jackknife-minmax, CV+, and CV-minmax strategies.
             X_test.ravel(),
             y_mesh.ravel(),
             1.96*noise,
-            y_preds[strategy][:, 0].ravel(),
-            y_preds[strategy][:, 1].ravel(),
-            y_preds[strategy][:, 2].ravel(),
+            y_pred[strategy].ravel(),
+            y_pis[strategy][:, 0, 0].ravel(),
+            y_pis[strategy][:, 1, 0].ravel(),
             ax=coord,
             title=strategy
         )
@@ -178,7 +178,7 @@ Let’s confirm this by comparing the prediction interval widths over
 
     fig, ax = plt.subplots(1, 1, figsize=(7, 5))
     for strategy in STRATEGIES:
-        ax.plot(X_test, y_preds[strategy][:, 2] - y_preds[strategy][:, 1])
+        ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0])
     ax.axhline(1.96*2*noise, ls="--", color="k")
     ax.set_xlabel("x")
     ax.set_ylabel("Prediction Interval Width")
@@ -311,11 +311,11 @@ strategies.
         "cv_plus": dict(method="plus", cv=10),
         "cv_minmax": dict(method="minmax", cv=10),
     }
-    prediction_interval = {}
+    y_pred, y_pis = {}, {}
     for strategy, params in STRATEGIES.items():
-        mapie = MapieRegressor(polyn_model, alpha=0.05, ensemble=False, **params)
+        mapie = MapieRegressor(polyn_model, ensemble=False, **params)
         mapie.fit(X_train, y_train)
-        y_preds[strategy] = mapie.predict(X_test)[:, :, 0]
+        y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
 
 
 .. code:: python
@@ -331,9 +331,9 @@ strategies.
             X_test.ravel(),
             y_mesh.ravel(),
             1.96*noise, 
-            y_preds[strategy][:, 0].ravel(),
-            y_preds[strategy][:, 1].ravel(),
-            y_preds[strategy][:, 2].ravel(), 
+            y_pred[strategy].ravel(),
+            y_pis[strategy][:, 0, :].ravel(),
+            y_pis[strategy][:, 1, :].ravel(), 
             ax=coord,
             title=strategy
         )
@@ -354,7 +354,7 @@ Let's now compare the prediction interval widths between all strategies.
     fig, ax = plt.subplots(1, 1, figsize=(7, 5))
     ax.set_yscale("log")
     for strategy in STRATEGIES:
-        ax.plot(X_test, y_preds[strategy][:, 2] - y_preds[strategy][:, 1])
+        ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0])
     ax.axhline(1.96*2*noise, ls="--", color="k")
     ax.set_xlabel("x")
     ax.set_ylabel("Prediction Interval Width")
@@ -514,9 +514,9 @@ and compare their prediction interval.
     model_names = ["polyn", "xgb", "mlp"]
     prediction_interval = {}
     for name, model in zip(model_names, models):
-        mapie = MapieRegressor(model, alpha=0.05, method="plus", cv=5, ensemble=True)
+        mapie = MapieRegressor(model, method="plus", cv=5, ensemble=True)
         mapie.fit(X_train, y_train)
-        y_preds[name] = mapie.predict(X_test)[:, :, 0]
+        y_pred[name], y_pis[name] = mapie.predict(X_test, alpha=0.05)
 
 .. code:: python
 
@@ -528,9 +528,9 @@ and compare their prediction interval.
             X_test.ravel(),
             y_mesh.ravel(),
             1.96*noise,
-            y_preds[name][:, 0].ravel(),
-            y_preds[name][:, 1].ravel(),
-            y_preds[name][:, 2].ravel(),
+            y_pred[name].ravel(),
+            y_pis[name][:, 0, 0].ravel(),
+            y_pis[name][:, 1, 0].ravel(),
             ax=ax,
             title=name
         )
diff --git a/examples/plot_barber2020_simulations.py b/examples/plot_barber2020_simulations.py
@@ -108,18 +108,17 @@ def PIs_vs_dimensions(
             for strategy, params in strategies.items():
                 mapie = MapieRegressor(
                     LinearRegression(),
-                    alpha=alpha,
                     ensemble=True,
                     n_jobs=-1,
                     **params
                 )
                 mapie.fit(X_train, y_train)
-                y_preds = mapie.predict(X_test)[:, :, 0]
+                y_pred, y_pis = mapie.predict(X_test, alpha=alpha)
                 results[strategy][dimension]["coverage"][trial] = (
-                    coverage_score(y_test, y_preds[:, 1], y_preds[:, 2])
+                    coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
                 )
                 results[strategy][dimension]["width_mean"][trial] = (
-                    y_preds[:, 2] - y_preds[:, 1]
+                    y_pis[:, 1, 0] - y_pis[:, 0, 0]
                 ).mean()
     return results
 
@@ -198,3 +197,4 @@ def plot_simulation_results(
 dimensions = np.arange(10, 150, 10)
 results = PIs_vs_dimensions(STRATEGIES, alpha, ntrial, dimensions)
 plot_simulation_results(results, title="Coverages and interval widths")
+plt.show()
diff --git a/examples/plot_homoscedastic_1d_data.py b/examples/plot_homoscedastic_1d_data.py
@@ -139,22 +139,22 @@ def plot_1d_data(
 for i, (strategy, params) in enumerate(STRATEGIES.items()):
     mapie = MapieRegressor(
         polyn_model,
-        alpha=0.05,
         ensemble=True,
         n_jobs=-1,
         **params
     )
     mapie.fit(X_train.reshape(-1, 1), y_train)
-    y_preds = mapie.predict(X_test.reshape(-1, 1))[:, :, 0]
+    y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=0.05,)
     plot_1d_data(
         X_train,
         y_train,
         X_test,
         y_test,
         y_test_sigma,
-        y_preds[:, 0],
-        y_preds[:, 1],
-        y_preds[:, 2],
+        y_pred,
+        y_pis[:, 0, 0],
+        y_pis[:, 1, 0],
         axs[i],
         strategy
     )
+plt.show()
diff --git a/examples/plot_nested-cv.py b/examples/plot_nested-cv.py
@@ -90,20 +90,21 @@
 best_est = cv_obj.best_estimator_
 mapie_non_nested = MapieRegressor(
     best_est,
-    alpha=alpha,
     method="plus",
     cv=cv,
     ensemble=True,
     n_jobs=-1
 )
 mapie_non_nested.fit(X_train, y_train)
-y_preds_non_nested = mapie_non_nested.predict(X_test)[:, :, 0]
-widths_non_nested = y_preds_non_nested[:, 2] - y_preds_non_nested[:, 1]
+y_pred_non_nested, y_pis_non_nested = mapie_non_nested.predict(
+    X_test, alpha=alpha
+)
+widths_non_nested = y_pis_non_nested[:, 1, 0] - y_pis_non_nested[:, 0, 0]
 coverage_non_nested = coverage_score(
-    y_test, y_preds_non_nested[:, 1], y_preds_non_nested[:, 2]
+    y_test, y_pis_non_nested[:, 0, 0], y_pis_non_nested[:, 1, 0]
 )
 score_non_nested = mean_squared_error(
-    y_test, y_preds_non_nested[:, 0], squared=False
+    y_test, y_pred_non_nested, squared=False
 )
 
 # Nested approach with the CV+ strategy using the Random Forest model.
@@ -120,18 +121,17 @@
 )
 mapie_nested = MapieRegressor(
     cv_obj,
-    alpha=alpha,
     method="plus",
     cv=cv,
     ensemble=True
 )
 mapie_nested.fit(X_train, y_train)
-y_preds_nested = mapie_nested.predict(X_test)[:, :, 0]
-widths_nested = y_preds_nested[:, 2] - y_preds_nested[:, 1]
+y_pred_nested, y_pis_nested = mapie_nested.predict(X_test, alpha=alpha)
+widths_nested = y_pis_nested[:, 1] - y_pis_nested[:, 0]
 coverage_nested = coverage_score(
-    y_test, y_preds_nested[:, 1], y_preds_nested[:, 2]
+    y_test, y_pis_nested[:, 0, 0], y_pis_nested[:, 1, 0]
 )
-score_nested = mean_squared_error(y_test, y_preds_nested[:, 0], squared=False)
+score_nested = mean_squared_error(y_test, y_pred_nested, squared=False)
 
 # Print scores and effective coverages.
 print(
diff --git a/examples/plot_prefit_nn.py b/examples/plot_prefit_nn.py
@@ -45,12 +45,13 @@ def f(x: np.ndarray) -> np.ndarray:
 model.fit(X_train.reshape(-1, 1), y_train)
 
 # Calibrate uncertainties on validation set
-alpha = 0.1
-mapie = MapieRegressor(model, alpha=alpha, cv="prefit")
+mapie = MapieRegressor(model, cv="prefit")
 mapie.fit(X_val.reshape(-1, 1), y_val)
 
 # Evaluate prediction and coverage level on testing set
-y_pred, y_pred_low, y_pred_up = mapie.predict(X_test.reshape(-1, 1))[:, :, 0].T
+alpha = 0.1
+y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=alpha)
+y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
 coverage = coverage_score(y_test, y_pred_low, y_pred_up)
 
 # Plot obtained prediction intervals on testing set
diff --git a/examples/plot_toy_model.py b/examples/plot_toy_model.py
@@ -18,26 +18,26 @@
 X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
 
 alpha = [0.05, 0.32]
-mapie = MapieRegressor(regressor, alpha=alpha, method="plus")
+mapie = MapieRegressor(regressor, method="plus")
 mapie.fit(X, y)
-y_preds = mapie.predict(X)
+y_pred, y_pis = mapie.predict(X, alpha=alpha)
 
 coverage_scores = [
-    coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
+    coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
     for i, _ in enumerate(alpha)
 ]
 
 plt.xlabel("x")
 plt.ylabel("y")
 plt.scatter(X, y, alpha=0.3)
-plt.plot(X, y_preds[:, 0, 0], color="C1")
+plt.plot(X, y_pred, color="C1")
 order = np.argsort(X[:, 0])
-plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
-plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
+plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
+plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
 plt.fill_between(
     X[order].ravel(),
-    y_preds[:, 1, 0][order].ravel(),
-    y_preds[:, 2, 0][order].ravel(),
+    y_pis[order][:, 0, 0].ravel(),
+    y_pis[order][:, 1, 0].ravel(),
     alpha=0.2
 )
 plt.title(
diff --git a/mapie/estimators.py b/mapie/estimators.py
diff --git a/mapie/tests/test_estimators.py b/mapie/tests/test_estimators.py