scikit-learn-contrib
diff --git a/‎.flake8‎
Lines changed: 0 additions & 2 deletions b/‎.flake8‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎README.rst‎
Lines changed: 14 additions & 4 deletions b/‎README.rst‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎examples/plot_barber2020_simulations.py‎
Lines changed: 43 additions & 22 deletions b/‎examples/plot_barber2020_simulations.py‎
Lines changed: 43 additions & 22 deletions
diff --git a/‎examples/plot_nested-cv.py‎
Lines changed: 39 additions & 26 deletions b/‎examples/plot_nested-cv.py‎
Lines changed: 39 additions & 26 deletions
diff --git a/‎examples/plot_prefit_nn.py‎
Lines changed: 26 additions & 6 deletions b/‎examples/plot_prefit_nn.py‎
Lines changed: 26 additions & 6 deletions
diff --git a/‎examples/plot_toy_model.py‎
Lines changed: 14 additions & 4 deletions b/‎examples/plot_toy_model.py‎
Lines changed: 14 additions & 4 deletions
@@ -112,11 +112,21 @@ The estimated prediction intervals can then be plotted as follows.
     order = np.argsort(X[:, 0])
     plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
     plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
-    plt.fill_between(X[order].ravel(), y_preds[:, 1, 0][order].ravel(), y_preds[:, 2, 0][order].ravel(), alpha=0.2)
-    coverage_scores = [coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i]) for i, _ in enumerate(alpha)]
+    plt.fill_between(
+        X[order].ravel(),
+        y_preds[:, 1, 0][order].ravel(),
+        y_preds[:, 2, 0][order].ravel(),
+        alpha=0.2
+    )
+    coverage_scores = [
+        coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
+        for i, _ in enumerate(alpha)
+    ]
     plt.title(
-        f"Target and effective coverages for alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n" +
-        f"Target and effective coverages for alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
+        f"Target and effective coverages for "
+        f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
+        f"Target and effective coverages for "
+        f"alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
     )
     plt.show()
 
 
@@ -16,7 +16,8 @@
 available strategies as function of the dataset dimension.
 
 We then show the prediction interval coverages and widths as a function of the
-dimension values for selected strategies with standard error given by the different trials.
+dimension values for selected strategies with standard error given by
+the different trials.
 
 This simulation is carried out to emphasize the instability of the prediction
 intervals estimated by the Jackknife strategy when the dataset dimension is
@@ -42,25 +43,30 @@ def PIs_vs_dimensions(
     Compute the prediction intervals for a linear regression problem.
     Function adapted from Foygel-Barber et al. (2020).
 
-    It generates several times linear data with random noise whose signal-to-noise
-    is equal to 10 and for several given dimensions, given by the dimensions list.
+    It generates several times linear data with random noise whose
+    signal-to-noise     is equal to 10 and for several given dimensions,
+    given by the dimensions list.
 
-    Here we use MAPIE, with a LinearRegression base model, to estimate the width
-    means and the coverage levels of the prediction intervals estimated by all the
-    available strategies as a function of the dataset dimension.
+    Here we use MAPIE, with a LinearRegression base model, to estimate
+    the width means and the coverage levels of the prediction intervals
+    estimated by all the available strategies as a function of
+    the dataset dimension.
 
-    This simulation is carried out to emphasize the instability of the prediction
-    intervals estimated by the Jackknife strategy when the dataset dimension is
-    equal to the number of training samples (here 100).
+    This simulation is carried out to emphasize the instability
+    of the prediction intervals estimated by the Jackknife strategy
+    when the dataset dimension is equal to the number
+    of training samples (here 100).
 
     Parameters
     ----------
     strategies : Dict[str, Dict[str, Any]]
-        List of strategies for estimating prediction intervals, with corresponding parameters.
+        List of strategies for estimating prediction intervals,
+        with corresponding parameters.
     alpha : float
         1 - (target coverage level).
     n_trial : int
-        Number of trials for each dimension for estimating prediction intervals.
+        Number of trials for each dimension for estimating
+        prediction intervals.
         For each trial, a new random noise is generated.
     dimensions : List[int]
         List of dimension values of input data.
@@ -104,8 +110,8 @@ def PIs_vs_dimensions(
                 )
                 mapie.fit(X_train, y_train)
                 y_preds = mapie.predict(X_test)[:, :, 0]
-                results[strategy][dimension]["coverage"][trial] = coverage_score(
-                    y_test, y_preds[:, 1], y_preds[:, 2]
+                results[strategy][dimension]["coverage"][trial] = (
+                    coverage_score(y_test, y_preds[:, 1], y_preds[:, 2])
                 )
                 results[strategy][dimension]["width_mean"][trial] = (
                     y_preds[:, 2] - y_preds[:, 1]
@@ -118,8 +124,9 @@ def plot_simulation_results(
     title: str
 ) -> None:
     """
-    Show the prediction interval coverages and widths as a function of dimension values
-    for selected strategies with standard error given by different trials.
+    Show the prediction interval coverages and widths as a function
+    of dimension values for selected strategies with standard error
+    given by different trials.
 
     Parameters
     ----------
@@ -138,18 +145,32 @@ def plot_simulation_results(
         coverage_mean, coverage_SE, width_mean, width_SE = (
             np.zeros(n_dim), np.zeros(n_dim), np.zeros(n_dim), np.zeros(n_dim)
         )
-        for idim, dimension in enumerate(dimensions):
-            coverage_mean[idim] = results[strategy][dimension]["coverage"].mean()
-            coverage_SE[idim] = results[strategy][dimension]["coverage"].std()/np.sqrt(ntrial)
-            width_mean[idim] = results[strategy][dimension]["width_mean"].mean()
-            width_SE[idim] = results[strategy][dimension]["width_mean"].std()/np.sqrt(ntrial)
+        for idim, dim in enumerate(dimensions):
+            coverage_mean[idim] = (
+                results[strategy][dim]["coverage"].mean()
+            )
+            coverage_SE[idim] = (
+                results[strategy][dim]["coverage"].std()/np.sqrt(ntrial)
+            )
+            width_mean[idim] = (
+                results[strategy][dim]["width_mean"].mean()
+            )
+            width_SE[idim] = (
+                results[strategy][dim]["width_mean"].std()/np.sqrt(ntrial)
+            )
         ax1.plot(dimensions, coverage_mean, label=strategy)
         ax1.fill_between(
-            dimensions, coverage_mean - coverage_SE, coverage_mean + coverage_SE, alpha=0.25
+            dimensions,
+            coverage_mean - coverage_SE,
+            coverage_mean + coverage_SE,
+            alpha=0.25
         )
         ax2.plot(dimensions, width_mean, label=strategy)
         ax2.fill_between(
-            dimensions, width_mean - width_SE, width_mean + width_SE, alpha=0.25
+            dimensions,
+            width_mean - width_SE,
+            width_mean + width_SE,
+            alpha=0.25
         )
     ax1.axhline(1 - alpha, linestyle="dashed", c="k")
     ax1.set_ylim(0.0, 1.0)
 
@@ -11,36 +11,37 @@
 The model with the set of parameters that gives the best score is then used in
 MAPIE to estimate the prediction intervals associated with the predictions.
 A limitation of this method is that residuals used by MAPIE are computed on
-the validation dataset, which can be subject to overfitting as far as hyperparameter
-tuning is concerned.
+the validation dataset, which can be subject to overfitting as far as
+hyperparameter tuning is concerned.
 This fools MAPIE into being slightly too optimistic with confidence intervals.
 
 To solve this problem, an alternative option is to perform a nested
 cross-validation parameter search directly within the MAPIE estimator on each
 *out-of-fold* dataset.
-For each testing fold used by MAPIE to store residuals, an internal cross-validation
-occurs on the training fold, optimizing hyperparameters.
-This ensures that residuals seen by MAPIE are never seen by the algorithm beforehand.
-However, this method is much computationally heavier since it results in
-:math:`N * P` calculations, where *N* is the number of *out-of-fold*
-models and *P* the number of parameter search iterations, versus :math:`N + P`
-for the non-nested approach.
+For each testing fold used by MAPIE to store residuals, an internal
+cross-validation occurs on the training fold, optimizing hyperparameters.
+This ensures that residuals seen by MAPIE are never seen by the algorithm
+beforehand. However, this method is much computationally heavier since
+it results in :math:`N * P` calculations, where *N* is the number of
+*out-of-fold* models and *P* the number of parameter search iterations,
+versus :math:`N + P` for the non-nested approach.
 
 Here, we compare the two strategies on the Boston dataset. We use the Random
-Forest Regressor as a base regressor for the CV+ strategy. For the sake of light
-computation, we adopt a RandomizedSearchCV parameter search strategy with a low
-number of iterations and with a reproducible random state.
+Forest Regressor as a base regressor for the CV+ strategy. For the sake of
+light computation, we adopt a RandomizedSearchCV parameter search strategy
+with a low number of iterations and with a reproducible random state.
 
-The two approaches give slightly different predictions with the nested CV approach
-estimating slightly larger prediction interval widths by a few percents at most (apart from a
-handful of exceptions).
+The two approaches give slightly different predictions with the nested CV
+approach estimating slightly larger prediction interval widths by a
+few percents at most (apart from a handful of exceptions).
 
-For this example, the two approaches result in identical scores and identical effective
-coverages.
+For this example, the two approaches result in identical scores and identical
+effective coverages.
 
-In the general case, the recommended approach is to use nested cross-validation, since it
-does not underestimate residuals and hence prediction intervals. However, in this particular
-example, effective coverages of both nested and non-nested methods are the same.
+In the general case, the recommended approach is to use nested
+cross-validation, since it does not underestimate residuals and hence
+prediction intervals. However, in this particular example, effective
+coverages of both nested and non-nested methods are the same.
 """
 
 import matplotlib.pyplot as plt
@@ -98,8 +99,12 @@
 mapie_non_nested.fit(X_train, y_train)
 y_preds_non_nested = mapie_non_nested.predict(X_test)[:, :, 0]
 widths_non_nested = y_preds_non_nested[:, 2] - y_preds_non_nested[:, 1]
-coverage_non_nested = coverage_score(y_test, y_preds_non_nested[:, 1], y_preds_non_nested[:, 2])
-score_non_nested = mean_squared_error(y_test, y_preds_non_nested[:, 0], squared=False)
+coverage_non_nested = coverage_score(
+    y_test, y_preds_non_nested[:, 1], y_preds_non_nested[:, 2]
+)
+score_non_nested = mean_squared_error(
+    y_test, y_preds_non_nested[:, 0], squared=False
+)
 
 # Nested approach with the CV+ strategy using the Random Forest model.
 cv_obj = RandomizedSearchCV(
@@ -123,17 +128,23 @@
 mapie_nested.fit(X_train, y_train)
 y_preds_nested = mapie_nested.predict(X_test)[:, :, 0]
 widths_nested = y_preds_nested[:, 2] - y_preds_nested[:, 1]
-coverage_nested = coverage_score(y_test, y_preds_nested[:, 1], y_preds_nested[:, 2])
+coverage_nested = coverage_score(
+    y_test, y_preds_nested[:, 1], y_preds_nested[:, 2]
+)
 score_nested = mean_squared_error(y_test, y_preds_nested[:, 0], squared=False)
 
 # Print scores and effective coverages.
-print("Scores and effective coverages for the CV+ strategy using the Random Forest model.")
+print(
+    "Scores and effective coverages for the CV+ strategy using the "
+    "Random Forest model."
+)
 print(
     "Score on the test set for the non-nested and nested CV approaches: ",
     f"{score_non_nested: .3f}, {score_nested: .3f}"
 )
 print(
-    "Effective coverage on the test set for the non-nested and nested CV approaches: ",
+    "Effective coverage on the test set for the non-nested "
+    "and nested CV approaches: ",
     f"{coverage_non_nested: .3f}, {coverage_nested: .3f}"
 )
 
@@ -146,7 +157,9 @@
 ax1.set_ylim([min_x, max_x])
 ax1.scatter(widths_nested, widths_non_nested)
 ax1.plot([min_x, max_x], [min_x, max_x], ls="--", color="k")
-ax2.set_xlabel("[width(non-nested CV) - width(nested CV)] / width(non-nested CV)")
+ax2.set_xlabel(
+    "[width(non-nested CV) - width(nested CV)] / width(non-nested CV)"
+)
 ax2.set_ylabel("Counts")
 ax2.hist((widths_non_nested - widths_nested)/widths_non_nested, bins=15)
 plt.show()
@@ -33,8 +33,12 @@ def f(x: np.ndarray) -> np.ndarray:
 y = f(X) + np.random.normal(0, sigma, n_samples)
 
 # Train/validation/test split
-X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=1/10)
-X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=1/9)
+X_train_val, X_test, y_train_val, y_test = train_test_split(
+    X, y, test_size=1/10
+)
+X_train, X_val, y_train, y_val = train_test_split(
+    X_train_val, y_train_val, test_size=1/9
+)
 
 # Train model on training set
 model = MLPRegressor(activation="relu", random_state=1)
@@ -55,13 +59,29 @@ def f(x: np.ndarray) -> np.ndarray:
 order = np.argsort(X_test)
 
 plt.scatter(X_test, y_test, color="red", alpha=0.3, label="testing", s=2)
-plt.plot(X_test[order], y_test_theoretical[order], color="gray", label="True confidence intervals")
-plt.plot(X_test[order], y_test_theoretical[order] - theoretical_semi_width, color="gray", ls="--")
-plt.plot(X_test[order], y_test_theoretical[order] + theoretical_semi_width, color="gray", ls="--")
+plt.plot(
+    X_test[order],
+    y_test_theoretical[order],
+    color="gray",
+    label="True confidence intervals"
+)
+plt.plot(
+    X_test[order],
+    y_test_theoretical[order] - theoretical_semi_width,
+    color="gray",
+    ls="--"
+)
+plt.plot(
+    X_test[order],
+    y_test_theoretical[order] + theoretical_semi_width,
+    color="gray",
+    ls="--"
+)
 plt.plot(X_test[order], y_pred[order], label="Prediction intervals")
 plt.fill_between(X_test[order], y_pred_low[order], y_pred_up[order], alpha=0.2)
 plt.title(
-    f"Target and effective coverages for alpha={alpha}: ({1 - alpha:.3f}, {coverage:.3f})"
+    f"Target and effective coverages for "
+    f"alpha={alpha}: ({1 - alpha:.3f}, {coverage:.3f})"
 )
 plt.xlabel("x")
 plt.ylabel("y")
 
@@ -29,10 +29,20 @@
 order = np.argsort(X[:, 0])
 plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
 plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
-plt.fill_between(X[order].ravel(), y_preds[:, 1, 0][order].ravel(), y_preds[:, 2, 0][order].ravel(), alpha=0.2)
-coverage_scores = [coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i]) for i, _ in enumerate(alpha)]
+plt.fill_between(
+    X[order].ravel(),
+    y_preds[:, 1, 0][order].ravel(),
+    y_preds[:, 2, 0][order].ravel(),
+    alpha=0.2
+)
+coverage_scores = [
+    coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
+    for i, _ in enumerate(alpha)
+]
 plt.title(
-    f"Target and effective coverages for alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
-    f"Target and effective coverages for alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
+    f"Target and effective coverages for "
+    f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
+    f"Target and effective coverages for "
+    f"alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
 )
 plt.show()