scikit-learn-contrib
diff --git a/‎.appveyor.yml‎
Lines changed: 1 addition & 1 deletion b/‎.appveyor.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 1 addition & 1 deletion b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.travis.yml‎
Lines changed: 1 addition & 1 deletion b/‎.travis.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎HISTORY.rst‎
Lines changed: 3 additions & 3 deletions b/‎HISTORY.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/images/quickstart_1.png‎
0 Bytes b/‎doc/images/quickstart_1.png‎
0 Bytes
diff --git a/‎doc/tutorial.rst‎
Lines changed: 8 additions & 3 deletions b/‎doc/tutorial.rst‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎examples/plot_homoscedastic_1d_data.py‎
Lines changed: 14 additions & 18 deletions b/‎examples/plot_homoscedastic_1d_data.py‎
Lines changed: 14 additions & 18 deletions
diff --git a/‎examples/plot_nested-cv.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/plot_nested-cv.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/plot_prefit_nn.py‎
Lines changed: 69 additions & 0 deletions b/‎examples/plot_prefit_nn.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎examples/plot_toy_model.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/plot_toy_model.py‎
Lines changed: 2 additions & 1 deletion
@@ -20,7 +20,7 @@ install:
   - conda activate test-env
 
 test_script:
-  - mypy mapie examples --strict --config-file mypy.ini
+  - mypy mapie examples --strict
   - pytest -vs --doctest-modules --cov-branch --cov=mapie --pyargs mapie
 
 after_test:
 
@@ -25,7 +25,7 @@ Please describe the tests that you ran to verify your changes. Provide instructi
 - [ ] I have read the [contributing guidelines](https://github.com/simai-ml/MAPIE/blob/master/CONTRIBUTING.rst)
 - [ ] I have updated the [HISTORY.rst](https://github.com/simai-ml/MAPIE/blob/master/HISTORY.rst) and [AUTHORS.rst](https://github.com/simai-ml/MAPIE/blob/master/AUTHORS.rst) files
 - [ ] Linting passes successfully : `flake8 . --exclude=doc`
-- [ ] Typing passes successfully : `mypy mapie examples --strict --config-file mypy.ini`
+- [ ] Typing passes successfully : `mypy mapie examples --strict`
 - [ ] Unit tests pass successfully : `pytest -vs --doctest-modules mapie`
 - [ ] Coverage is 100% : `pytest -vs --doctest-modules --cov-branch --cov=mapie --pyargs mapie`
 - [ ] Documentation builds successfully : `cd doc; make clean; make html`
@@ -22,7 +22,7 @@ install:
   - conda activate test-env
 
 script:
-  - mypy mapie examples --strict --config-file mypy.ini
+  - mypy mapie examples --strict
   - pytest -vs --doctest-modules --cov-branch --cov=mapie --pyargs mapie
 
 after_success:
 
@@ -2,9 +2,10 @@
 History
 =======
 
-0.2.1 (2020-XX-XX)
+0.2.1 (2021-XX-XX)
 ------------------
 
+* Add `cv="prefit"` option
 * Add sample_weight argument in fit method
 
 0.2.0 (2021-05-21)
@@ -16,8 +17,7 @@ History
 * Remove the `n_splits`, `shuffle` and `random_state` parameters
 * Simplify the `method` parameter
 * Fix typos in documentation and add methods descriptions in sphinx
-* Accept alpha parameter as a list or np.ndarray
-* If alpha is an Iterable, `.predict()` returns a np.ndarray of shape (n_samples, 3, len(alpha))
+* Accept alpha parameter as a list or np.ndarray. If alpha is an Iterable, `.predict()` returns a np.ndarray of shape (n_samples, 3, len(alpha)).
 
 0.1.4 (2021-05-07)
 ------------------
 
@@ -69,7 +69,7 @@ over :math:`x`.
         x_sinx, min_x, max_x, n_samples, noise
     )
 
-Let"s visualize our noisy function. 
+Let's visualize our noisy function. 
 
 .. code:: python
 
@@ -494,11 +494,14 @@ uniform distribution.
 .. image:: images/tuto_7.png
     :align: center
 
-Let"s then define the models. The boosing model considers 100 shallow trees with a max depth of 2 while
+Let's then define the models. The boosing model considers 100 shallow trees with a max depth of 2 while
 the Multilayer Perceptron has two hidden dense layers with 20 neurons each followed by a relu activation.
 
 .. code:: python
 
+    from tensorflow.keras import Sequential
+    from tensorflow.keras.layers import Dense
+    from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
     def mlp():
         """
         Two-layer MLP model
@@ -519,6 +522,8 @@ the Multilayer Perceptron has two hidden dense layers with 20 neurons each follo
             ("linear", LinearRegression(fit_intercept=False))
         ]
     )
+
+    from xgboost import XGBRegressor
     xgb_model = XGBRegressor(
         max_depth=2,
         n_estimators=100,
@@ -534,7 +539,7 @@ the Multilayer Perceptron has two hidden dense layers with 20 neurons each follo
         verbose=0
     )
 
-Let"s now use MAPIE to estimate the prediction intervals using the CV+ method 
+Let's now use MAPIE to estimate the prediction intervals using the CV+ method 
 and compare their prediction interval.
 
 .. code:: python
 
@@ -22,12 +22,12 @@
 
 def f(x: np.ndarray) -> np.ndarray:
     """Polynomial function used to generate one-dimensional data"""
-    return np.stack(5*x + 5*x**4 - 9*x**2)
+    return np.array(5*x + 5*x**4 - 9*x**2)
 
 
 def get_homoscedastic_data(
-    n_samples: int = 200,
-    n_test: int = 1000,
+    n_train: int = 200,
+    n_true: int = 200,
     sigma: float = 0.1
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, float]:
     """
@@ -38,9 +38,9 @@ def get_homoscedastic_data(
 
     Parameters
     ----------
-    n_samples : int, optional
+    n_train : int, optional
         Number of training samples, by default  200.
-    n_test : int, optional
+    n_true : int, optional
         Number of test samples, by default 1000.
     sigma : float, optional
         Standard deviation of noise, by default 0.1
@@ -57,9 +57,9 @@ def get_homoscedastic_data(
     """
     np.random.seed(59)
     q95 = scipy.stats.norm.ppf(0.95)
-    X_train = np.random.exponential(0.4, n_samples)
-    X_true = np.linspace(0.001, 1.2, n_test, endpoint=False)
-    y_train = f(X_train) + np.random.normal(0, sigma, n_samples)
+    X_train = np.linspace(0, 1, n_train)
+    X_true = np.linspace(0, 1, n_true)
+    y_train = f(X_train) + np.random.normal(0, sigma, n_train)
     y_true = f(X_true)
     y_true_sigma = q95*sigma
     return X_train, y_train, X_true, y_true, y_true_sigma
@@ -106,7 +106,7 @@ def plot_1d_data(
     """
     ax.set_xlabel("x")
     ax.set_ylabel("y")
-    ax.set_xlim([0, 1.1])
+    ax.set_xlim([0, 1])
     ax.set_ylim([0, 1])
     ax.scatter(X_train, y_train, color="red", alpha=0.3, label="training")
     ax.plot(X_test, y_test, color="gray", label="True confidence intervals")
@@ -118,16 +118,12 @@ def plot_1d_data(
     ax.legend()
 
 
-X_train, y_train, X_test, y_test, y_test_sigma = get_homoscedastic_data(
-    n_samples=200, n_test=200, sigma=0.1
-)
+X_train, y_train, X_test, y_test, y_test_sigma = get_homoscedastic_data()
 
-polyn_model = Pipeline(
-    [
-        ("poly", PolynomialFeatures(degree=4)),
-        ("linear", LinearRegression(fit_intercept=False))
-    ]
-)
+polyn_model = Pipeline([
+    ("poly", PolynomialFeatures(degree=4)),
+    ("linear", LinearRegression(fit_intercept=False))
+])
 
 Params = TypedDict("Params", {"method": str, "cv": int})
 STRATEGIES = {
 
@@ -50,6 +50,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import RandomizedSearchCV
 from sklearn.metrics import mean_squared_error
+
 from mapie.estimators import MapieRegressor
 from mapie.metrics import coverage_score
 
 
@@ -0,0 +1,69 @@
+"""
+========================================================
+Example use of the prefit parameter with neural networks
+========================================================
+
+:class:`mapie.estimators.MapieRegressor` is used to calibrate
+uncertainties for large models for which the cost of cross-validation
+is too high. Typically, neural networks rely on a single validation set.
+
+In this example, we first fit a neural network on the training set. We
+then compute residuals on a validation set with the `cv="prefit"` parameter.
+Finally, we evaluate the model with prediction intervals on a testing set.
+"""
+import scipy
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPRegressor
+from matplotlib import pyplot as plt
+
+from mapie.estimators import MapieRegressor
+from mapie.metrics import coverage_score
+
+
+def f(x: np.ndarray) -> np.ndarray:
+    """Polynomial function used to generate one-dimensional data."""
+    return np.array(5*x + 5*x**4 - 9*x**2)
+
+
+# Generate data
+sigma = 0.1
+n_samples = 10000
+X = np.linspace(0, 1, n_samples)
+y = f(X) + np.random.normal(0, sigma, n_samples)
+
+# Train/validation/test split
+X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=1/10)
+X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=1/9)
+
+# Train model on training set
+model = MLPRegressor(activation="relu", random_state=1)
+model.fit(X_train.reshape(-1, 1), y_train)
+
+# Calibrate uncertainties on validation set
+alpha = 0.1
+mapie = MapieRegressor(model, alpha=alpha, cv="prefit")
+mapie.fit(X_val.reshape(-1, 1), y_val)
+
+# Evaluate prediction and coverage level on testing set
+y_pred, y_pred_low, y_pred_up = mapie.predict(X_test.reshape(-1, 1))[:, :, 0].T
+coverage = coverage_score(y_test, y_pred_low, y_pred_up)
+
+# Plot obtained prediction intervals on testing set
+theoretical_semi_width = scipy.stats.norm.ppf(1 - alpha)*sigma
+y_test_theoretical = f(X_test)
+order = np.argsort(X_test)
+
+plt.scatter(X_test, y_test, color="red", alpha=0.3, label="testing", s=2)
+plt.plot(X_test[order], y_test_theoretical[order], color="gray", label="True confidence intervals")
+plt.plot(X_test[order], y_test_theoretical[order] - theoretical_semi_width, color="gray", ls="--")
+plt.plot(X_test[order], y_test_theoretical[order] + theoretical_semi_width, color="gray", ls="--")
+plt.plot(X_test[order], y_pred[order], label="Prediction intervals")
+plt.fill_between(X_test[order], y_pred_low[order], y_pred_up[order], alpha=0.2)
+plt.title(
+    f"Target and effective coverages for alpha={alpha}: ({1 - alpha:.3f}, {coverage:.3f})"
+)
+plt.xlabel("x")
+plt.ylabel("y")
+plt.legend()
+plt.show()
@@ -10,6 +10,7 @@
 from matplotlib import pyplot as plt
 from sklearn.linear_model import LinearRegression
 from sklearn.datasets import make_regression
+
 from mapie.estimators import MapieRegressor
 from mapie.metrics import coverage_score
 
@@ -31,7 +32,7 @@
 plt.fill_between(X[order].ravel(), y_preds[:, 1, 0][order].ravel(), y_preds[:, 2, 0][order].ravel(), alpha=0.2)
 coverage_scores = [coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i]) for i, _ in enumerate(alpha)]
 plt.title(
-    f"Target and effective coverages for alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n" +
+    f"Target and effective coverages for alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n"
     f"Target and effective coverages for alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})"
 )
 plt.show()