scikit-learn-contrib
diff --git a/‎doc/images/quickstart_1.png
0 Bytes b/‎doc/images/quickstart_1.png
0 Bytes
diff --git a/‎notebooks/regression/ts-changepoint.ipynb
Lines changed: 26 additions & 24 deletions b/‎notebooks/regression/ts-changepoint.ipynb
Lines changed: 26 additions & 24 deletions
diff --git a/‎notebooks/regression/ts-changepoint.md
Lines changed: 107 additions & 26 deletions b/‎notebooks/regression/ts-changepoint.md
Lines changed: 107 additions & 26 deletions
@@ -1,24 +1,7 @@
----
-jupyter:
-  jupytext:
-    formats: ipynb,md
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.3'
-      jupytext_version: 1.13.6
-  kernelspec:
-    display_name: mapie-notebooks
-    language: python
-    name: mapie-notebooks
----
-
 # Estimating prediction intervals of time series forecast with EnbPI
 
-
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/scikit-learn-contrib/MAPIE/blob/add-ts-notebooks/notebooks/regression/ts-changepoint.ipynb)
 
-
 This example uses `mapie.time_series_regression.MapieTimeSeriesRegressor` to estimate
 prediction intervals associated with time series forecast. It follows Xu \& Xie (2021).
 We use here the Victoria electricity demand dataset used in the book
@@ -34,12 +17,14 @@ The best model is then feeded into
 associated prediction intervals. We compare four approaches: with or without
 ``partial_fit`` called at every step. 
 
+
 ```python
 install_mapie = False
 if install_mapie:
-    !pip install "git+https://github.com/scikit-learn-contrib/MAPIE.git@add-ts-notebooks"
+    !pip install mapie
 ```
 
+
 ```python
 import warnings
 
@@ -61,6 +46,7 @@ warnings.simplefilter("ignore")
 
 ## 1. Load input data and feature engineering
 
+
 ```python
 url_file = "https://raw.githubusercontent.com/scikit-learn-contrib/MAPIE/master/examples/data/demand_temperature.csv"
 demand_df = pd.read_csv(
@@ -79,6 +65,7 @@ for hour in range(1, n_lags):
 
 ## 2. Train/validation/test split
 
+
 ```python
 num_test_steps = 24 * 7
 demand_train = demand_df.iloc[:-num_test_steps, :].copy()
@@ -94,15 +81,30 @@ X_test = demand_test.loc[:, features]
 y_test = demand_test["Demand"]
 ```
 
+
 ```python
 plt.figure(figsize=(16, 5))
 plt.plot(y_train)
 plt.plot(y_test)
 plt.ylabel("Hourly demand (GW)")
 ```
 
+
+
+
+    Text(0, 0.5, 'Hourly demand (GW)')
+
+
+
+
+    
+![png](output_9_1.png)
+    
+
+
 ## 3. Optimize the base estimator
 
+
 ```python
 model_params_fit_not_done = False
 if model_params_fit_not_done:
@@ -133,6 +135,7 @@ else:
 
 ## 4. Estimate prediction intervals on the test set
 
+
 ```python
 alpha = 0.05
 gap = 1
@@ -146,11 +149,12 @@ mapie_enbpi = MapieTimeSeriesRegressor(
 
 ### Without partial fit
 
+
 ```python
 print("EnbPI, with no partial_fit, width optimization")
 mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
 y_pred_npfit, y_pis_npfit = mapie_enbpi.predict(
-    X_test, alpha=alpha, ensemble=True, beta_optimize=True
+    X_test, alpha=alpha, ensemble=True, optimize_beta=True
 )
 coverage_npfit = regression_coverage_score(
     y_test, y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0]
@@ -160,16 +164,20 @@ width_npfit = regression_mean_width_score(
 )
 ```
 
+    EnbPI, with no partial_fit, width optimization
+
+
 ### With partial fit
 
+
 ```python
 print("EnbPI with partial_fit, width optimization")
 mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
 
 y_pred_pfit = np.zeros(y_pred_npfit.shape)
 y_pis_pfit = np.zeros(y_pis_npfit.shape)
 y_pred_pfit[:gap], y_pis_pfit[:gap, :, :] = mapie_enbpi.predict(
-    X_test.iloc[:gap, :], alpha=alpha, ensemble=True
+    X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True
 )
 for step in range(gap, len(X_test), gap):
     mapie_enbpi.partial_fit(
@@ -182,7 +190,8 @@ for step in range(gap, len(X_test), gap):
     ) = mapie_enbpi.predict(
         X_test.iloc[step:(step + gap), :],
         alpha=alpha,
-        ensemble=True
+        ensemble=True,
+        optimize_beta=True
     )
 coverage_pfit = regression_coverage_score(
     y_test, y_pis_pfit[:, 0, 0], y_pis_pfit[:, 1, 0]
@@ -192,15 +201,20 @@ width_pfit = regression_mean_width_score(
 )
 ```
 
+    EnbPI with partial_fit, width optimization
+
+
 ## V. Plot estimated prediction intervals on test set
 
+
 ```python
 y_preds = [y_pred_npfit, y_pred_pfit]
 y_pis = [y_pis_npfit, y_pis_pfit]
 coverages = [coverage_npfit, coverage_pfit]
 widths = [width_npfit, width_pfit]
 ```
 
+
 ```python
 def plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_coverage=True):
     fig, axs = plt.subplots(
@@ -231,23 +245,30 @@ def plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_cover
     plt.show()
 ```
 
+
 ```python
 plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths)
 ```
 
-## VI. Forecast on test dataset with change point
 
+    
+![png](output_21_0.png)
+    
 
-We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training. 
 
+## VI. Forecast on test dataset with change point
+
+We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training. 
 
 ### Corrupt the dataset
 
+
 ```python
 demand_df_corrupted = demand_df.copy()
 demand_df_corrupted.Demand.iloc[-int(num_test_steps/2):] -= 2
 ```
 
+
 ```python
 n_lags = 5
 for hour in range(1, n_lags):
@@ -263,20 +284,35 @@ X_test = demand_test_corrupted.loc[:, features]
 y_test = demand_test_corrupted["Demand"]
 ```
 
+
 ```python
 plt.figure(figsize=(16, 5))
 plt.ylabel("Hourly demand (GW)")
 plt.plot(y_train)
 plt.plot(y_test)
 ```
 
+
+
+
+    [<matplotlib.lines.Line2D at 0x16a409930>]
+
+
+
+
+    
+![png](output_27_1.png)
+    
+
+
 ### Prediction intervals without partial fit
 
+
 ```python
 print("EnbPI, with no partial_fit, width optimization")
 mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
 y_pred_npfit, y_pis_npfit = mapie_enbpi.predict(
-    X_test, alpha=alpha, ensemble=True, beta_optimize=True
+    X_test, alpha=alpha, ensemble=True, optimize_beta=True
 )
 coverage_npfit = regression_coverage_score(
     y_test, y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0]
@@ -286,8 +322,12 @@ width_npfit = regression_mean_width_score(
 )
 ```
 
+    EnbPI, with no partial_fit, width optimization
+
+
 ### Prediction intervals with partial fit
 
+
 ```python
 print("EnbPI with partial_fit, width optimization")
 mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
@@ -296,7 +336,7 @@ y_pred_pfit = np.zeros(y_pred_npfit.shape)
 y_pis_pfit = np.zeros(y_pis_npfit.shape)
 conformity_scores_pfit, lower_quantiles_pfit, higher_quantiles_pfit = [], [], []
 y_pred_pfit[:gap], y_pis_pfit[:gap, :, :] = mapie_enbpi.predict(
-    X_test.iloc[:gap, :], alpha=alpha, ensemble=True
+    X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True
 )
 for step in range(gap, len(X_test), gap):
     mapie_enbpi.partial_fit(
@@ -309,7 +349,8 @@ for step in range(gap, len(X_test), gap):
     ) = mapie_enbpi.predict(
         X_test.iloc[step:(step + gap), :],
         alpha=alpha,
-        ensemble=True
+        ensemble=True, 
+        optimize_beta=True
     )
     conformity_scores_pfit.append(mapie_enbpi.conformity_scores_)
     lower_quantiles_pfit.append(mapie_enbpi.lower_quantiles_)
@@ -322,19 +363,31 @@ width_pfit = regression_mean_width_score(
 )
 ```
 
+    EnbPI with partial_fit, width optimization
+
+
 ### Plot estimated prediction intervals on test set
 
+
 ```python
 y_preds = [y_pred_npfit, y_pred_pfit]
 y_pis = [y_pis_npfit, y_pis_pfit]
 coverages = [coverage_npfit, coverage_pfit]
 widths = [width_npfit, width_pfit]
 ```
 
+
 ```python
 plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_coverage=False)
 ```
 
+
+    
+![png](output_34_0.png)
+    
+
+
+
 ```python
 window = 24
 rolling_coverage_pfit, rolling_coverage_npfit = [], []
@@ -353,15 +406,30 @@ for i in range(window, len(y_test), 1):
 
 ### Marginal coverage on a 24-hour rolling window of prediction intervals
 
+
 ```python
 plt.figure(figsize=(10, 5))
 plt.ylabel(f"Rolling coverage [{window} hours]")
 plt.plot(y_test[window:].index, rolling_coverage_npfit, label="Without update of residuals")
 plt.plot(y_test[window:].index, rolling_coverage_pfit, label="With update of residuals")
 ```
 
+
+
+
+    [<matplotlib.lines.Line2D at 0x16b986710>]
+
+
+
+
+    
+![png](output_37_1.png)
+    
+
+
 ### Temporal evolution of the distribution of residuals used for estimating prediction intervals
 
+
 ```python
 plt.figure(figsize=(7, 5))
 for i, j in enumerate([0, -1]):
@@ -370,3 +438,16 @@ for i, j in enumerate([0, -1]):
     plt.axvline(higher_quantiles_pfit[j], ls="--", color=f"C{i}")
 plt.legend(loc=[1, 0])
 ```
+
+
+
+
+    <matplotlib.legend.Legend at 0x16b985390>
+
+
+
+
+    
+![png](output_39_1.png)
+    
+