|
| 1 | +""" |
| 2 | +======================================================== |
| 3 | +Example use of the prefit parameter with neural networks |
| 4 | +======================================================== |
| 5 | +
|
| 6 | +:class:`mapie.estimators.MapieRegressor` is used to calibrate |
| 7 | +uncertainties for large models for which the cost of cross-validation |
| 8 | +is too high. Typically, neural networks rely on a single validation set. |
| 9 | +
|
| 10 | +In this example, we first fit a neural network on the training set. We |
| 11 | +then compute residuals on a validation set with the `cv="prefit"` parameter. |
| 12 | +Finally, we evaluate the model with prediction intervals on a testing set. |
| 13 | +""" |
| 14 | +import scipy |
| 15 | +import numpy as np |
| 16 | +from sklearn.model_selection import train_test_split |
| 17 | +from sklearn.neural_network import MLPRegressor |
| 18 | +from matplotlib import pyplot as plt |
| 19 | + |
| 20 | +from mapie.estimators import MapieRegressor |
| 21 | +from mapie.metrics import coverage_score |
| 22 | + |
| 23 | + |
| 24 | +def f(x: np.ndarray) -> np.ndarray: |
| 25 | + """Polynomial function used to generate one-dimensional data.""" |
| 26 | + return np.array(5*x + 5*x**4 - 9*x**2) |
| 27 | + |
| 28 | + |
| 29 | +# Generate data |
| 30 | +sigma = 0.1 |
| 31 | +n_samples = 10000 |
| 32 | +X = np.linspace(0, 1, n_samples) |
| 33 | +y = f(X) + np.random.normal(0, sigma, n_samples) |
| 34 | + |
| 35 | +# Train/validation/test split |
| 36 | +X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=1/10) |
| 37 | +X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=1/9) |
| 38 | + |
| 39 | +# Train model on training set |
| 40 | +model = MLPRegressor(activation="relu", random_state=1) |
| 41 | +model.fit(X_train.reshape(-1, 1), y_train) |
| 42 | + |
| 43 | +# Calibrate uncertainties on validation set |
| 44 | +alpha = 0.1 |
| 45 | +mapie = MapieRegressor(model, alpha=alpha, cv="prefit") |
| 46 | +mapie.fit(X_val.reshape(-1, 1), y_val) |
| 47 | + |
| 48 | +# Evaluate prediction and coverage level on testing set |
| 49 | +y_pred, y_pred_low, y_pred_up = mapie.predict(X_test.reshape(-1, 1))[:, :, 0].T |
| 50 | +coverage = coverage_score(y_test, y_pred_low, y_pred_up) |
| 51 | + |
| 52 | +# Plot obtained prediction intervals on testing set |
| 53 | +theoretical_semi_width = scipy.stats.norm.ppf(1 - alpha)*sigma |
| 54 | +y_test_theoretical = f(X_test) |
| 55 | +order = np.argsort(X_test) |
| 56 | + |
| 57 | +plt.scatter(X_test, y_test, color="red", alpha=0.3, label="testing", s=2) |
| 58 | +plt.plot(X_test[order], y_test_theoretical[order], color="gray", label="True confidence intervals") |
| 59 | +plt.plot(X_test[order], y_test_theoretical[order] - theoretical_semi_width, color="gray", ls="--") |
| 60 | +plt.plot(X_test[order], y_test_theoretical[order] + theoretical_semi_width, color="gray", ls="--") |
| 61 | +plt.plot(X_test[order], y_pred[order], label="Prediction intervals") |
| 62 | +plt.fill_between(X_test[order], y_pred_low[order], y_pred_up[order], alpha=0.2) |
| 63 | +plt.title( |
| 64 | + f"Target and effective coverages for alpha={alpha}: ({1 - alpha:.3f}, {coverage:.3f})" |
| 65 | +) |
| 66 | +plt.xlabel("x") |
| 67 | +plt.ylabel("y") |
| 68 | +plt.legend() |
| 69 | +plt.show() |
0 commit comments