Skip to content

Commit df94bb9

Browse files
authored
Merge pull request #194 from scikit-learn-contrib/correct_changepoint_notebook
Correct changepoint notebook
2 parents a31ee8b + 3f5ca79 commit df94bb9

File tree

3 files changed

+133
-50
lines changed

3 files changed

+133
-50
lines changed

doc/images/quickstart_1.png

0 Bytes
Loading

notebooks/regression/ts-changepoint.ipynb

Lines changed: 26 additions & 24 deletions
Large diffs are not rendered by default.

notebooks/regression/ts-changepoint.md

Lines changed: 107 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,7 @@
1-
---
2-
jupyter:
3-
jupytext:
4-
formats: ipynb,md
5-
text_representation:
6-
extension: .md
7-
format_name: markdown
8-
format_version: '1.3'
9-
jupytext_version: 1.13.6
10-
kernelspec:
11-
display_name: mapie-notebooks
12-
language: python
13-
name: mapie-notebooks
14-
---
15-
161
# Estimating prediction intervals of time series forecast with EnbPI
172

18-
193
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/scikit-learn-contrib/MAPIE/blob/add-ts-notebooks/notebooks/regression/ts-changepoint.ipynb)
204

21-
225
This example uses `mapie.time_series_regression.MapieTimeSeriesRegressor` to estimate
236
prediction intervals associated with time series forecast. It follows Xu \& Xie (2021).
247
We use here the Victoria electricity demand dataset used in the book
@@ -34,12 +17,14 @@ The best model is then feeded into
3417
associated prediction intervals. We compare four approaches: with or without
3518
``partial_fit`` called at every step.
3619

20+
3721
```python
3822
install_mapie = False
3923
if install_mapie:
40-
!pip install "git+https://github.com/scikit-learn-contrib/MAPIE.git@add-ts-notebooks"
24+
!pip install mapie
4125
```
4226

27+
4328
```python
4429
import warnings
4530

@@ -61,6 +46,7 @@ warnings.simplefilter("ignore")
6146

6247
## 1. Load input data and feature engineering
6348

49+
6450
```python
6551
url_file = "https://raw.githubusercontent.com/scikit-learn-contrib/MAPIE/master/examples/data/demand_temperature.csv"
6652
demand_df = pd.read_csv(
@@ -79,6 +65,7 @@ for hour in range(1, n_lags):
7965

8066
## 2. Train/validation/test split
8167

68+
8269
```python
8370
num_test_steps = 24 * 7
8471
demand_train = demand_df.iloc[:-num_test_steps, :].copy()
@@ -94,15 +81,30 @@ X_test = demand_test.loc[:, features]
9481
y_test = demand_test["Demand"]
9582
```
9683

84+
9785
```python
9886
plt.figure(figsize=(16, 5))
9987
plt.plot(y_train)
10088
plt.plot(y_test)
10189
plt.ylabel("Hourly demand (GW)")
10290
```
10391

92+
93+
94+
95+
Text(0, 0.5, 'Hourly demand (GW)')
96+
97+
98+
99+
100+
101+
![png](output_9_1.png)
102+
103+
104+
104105
## 3. Optimize the base estimator
105106

107+
106108
```python
107109
model_params_fit_not_done = False
108110
if model_params_fit_not_done:
@@ -133,6 +135,7 @@ else:
133135

134136
## 4. Estimate prediction intervals on the test set
135137

138+
136139
```python
137140
alpha = 0.05
138141
gap = 1
@@ -146,11 +149,12 @@ mapie_enbpi = MapieTimeSeriesRegressor(
146149

147150
### Without partial fit
148151

152+
149153
```python
150154
print("EnbPI, with no partial_fit, width optimization")
151155
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
152156
y_pred_npfit, y_pis_npfit = mapie_enbpi.predict(
153-
X_test, alpha=alpha, ensemble=True, beta_optimize=True
157+
X_test, alpha=alpha, ensemble=True, optimize_beta=True
154158
)
155159
coverage_npfit = regression_coverage_score(
156160
y_test, y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0]
@@ -160,16 +164,20 @@ width_npfit = regression_mean_width_score(
160164
)
161165
```
162166

167+
EnbPI, with no partial_fit, width optimization
168+
169+
163170
### With partial fit
164171

172+
165173
```python
166174
print("EnbPI with partial_fit, width optimization")
167175
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
168176

169177
y_pred_pfit = np.zeros(y_pred_npfit.shape)
170178
y_pis_pfit = np.zeros(y_pis_npfit.shape)
171179
y_pred_pfit[:gap], y_pis_pfit[:gap, :, :] = mapie_enbpi.predict(
172-
X_test.iloc[:gap, :], alpha=alpha, ensemble=True
180+
X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True
173181
)
174182
for step in range(gap, len(X_test), gap):
175183
mapie_enbpi.partial_fit(
@@ -182,7 +190,8 @@ for step in range(gap, len(X_test), gap):
182190
) = mapie_enbpi.predict(
183191
X_test.iloc[step:(step + gap), :],
184192
alpha=alpha,
185-
ensemble=True
193+
ensemble=True,
194+
optimize_beta=True
186195
)
187196
coverage_pfit = regression_coverage_score(
188197
y_test, y_pis_pfit[:, 0, 0], y_pis_pfit[:, 1, 0]
@@ -192,15 +201,20 @@ width_pfit = regression_mean_width_score(
192201
)
193202
```
194203

204+
EnbPI with partial_fit, width optimization
205+
206+
195207
## V. Plot estimated prediction intervals on test set
196208

209+
197210
```python
198211
y_preds = [y_pred_npfit, y_pred_pfit]
199212
y_pis = [y_pis_npfit, y_pis_pfit]
200213
coverages = [coverage_npfit, coverage_pfit]
201214
widths = [width_npfit, width_pfit]
202215
```
203216

217+
204218
```python
205219
def plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_coverage=True):
206220
fig, axs = plt.subplots(
@@ -231,23 +245,30 @@ def plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_cover
231245
plt.show()
232246
```
233247

248+
234249
```python
235250
plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths)
236251
```
237252

238-
## VI. Forecast on test dataset with change point
239253

254+
255+
![png](output_21_0.png)
256+
240257

241-
We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training.
242258

259+
## VI. Forecast on test dataset with change point
260+
261+
We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training.
243262

244263
### Corrupt the dataset
245264

265+
246266
```python
247267
demand_df_corrupted = demand_df.copy()
248268
demand_df_corrupted.Demand.iloc[-int(num_test_steps/2):] -= 2
249269
```
250270

271+
251272
```python
252273
n_lags = 5
253274
for hour in range(1, n_lags):
@@ -263,20 +284,35 @@ X_test = demand_test_corrupted.loc[:, features]
263284
y_test = demand_test_corrupted["Demand"]
264285
```
265286

287+
266288
```python
267289
plt.figure(figsize=(16, 5))
268290
plt.ylabel("Hourly demand (GW)")
269291
plt.plot(y_train)
270292
plt.plot(y_test)
271293
```
272294

295+
296+
297+
298+
[<matplotlib.lines.Line2D at 0x16a409930>]
299+
300+
301+
302+
303+
304+
![png](output_27_1.png)
305+
306+
307+
273308
### Prediction intervals without partial fit
274309

310+
275311
```python
276312
print("EnbPI, with no partial_fit, width optimization")
277313
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
278314
y_pred_npfit, y_pis_npfit = mapie_enbpi.predict(
279-
X_test, alpha=alpha, ensemble=True, beta_optimize=True
315+
X_test, alpha=alpha, ensemble=True, optimize_beta=True
280316
)
281317
coverage_npfit = regression_coverage_score(
282318
y_test, y_pis_npfit[:, 0, 0], y_pis_npfit[:, 1, 0]
@@ -286,8 +322,12 @@ width_npfit = regression_mean_width_score(
286322
)
287323
```
288324

325+
EnbPI, with no partial_fit, width optimization
326+
327+
289328
### Prediction intervals with partial fit
290329

330+
291331
```python
292332
print("EnbPI with partial_fit, width optimization")
293333
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
@@ -296,7 +336,7 @@ y_pred_pfit = np.zeros(y_pred_npfit.shape)
296336
y_pis_pfit = np.zeros(y_pis_npfit.shape)
297337
conformity_scores_pfit, lower_quantiles_pfit, higher_quantiles_pfit = [], [], []
298338
y_pred_pfit[:gap], y_pis_pfit[:gap, :, :] = mapie_enbpi.predict(
299-
X_test.iloc[:gap, :], alpha=alpha, ensemble=True
339+
X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True
300340
)
301341
for step in range(gap, len(X_test), gap):
302342
mapie_enbpi.partial_fit(
@@ -309,7 +349,8 @@ for step in range(gap, len(X_test), gap):
309349
) = mapie_enbpi.predict(
310350
X_test.iloc[step:(step + gap), :],
311351
alpha=alpha,
312-
ensemble=True
352+
ensemble=True,
353+
optimize_beta=True
313354
)
314355
conformity_scores_pfit.append(mapie_enbpi.conformity_scores_)
315356
lower_quantiles_pfit.append(mapie_enbpi.lower_quantiles_)
@@ -322,19 +363,31 @@ width_pfit = regression_mean_width_score(
322363
)
323364
```
324365

366+
EnbPI with partial_fit, width optimization
367+
368+
325369
### Plot estimated prediction intervals on test set
326370

371+
327372
```python
328373
y_preds = [y_pred_npfit, y_pred_pfit]
329374
y_pis = [y_pis_npfit, y_pis_pfit]
330375
coverages = [coverage_npfit, coverage_pfit]
331376
widths = [width_npfit, width_pfit]
332377
```
333378

379+
334380
```python
335381
plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_coverage=False)
336382
```
337383

384+
385+
386+
![png](output_34_0.png)
387+
388+
389+
390+
338391
```python
339392
window = 24
340393
rolling_coverage_pfit, rolling_coverage_npfit = [], []
@@ -353,15 +406,30 @@ for i in range(window, len(y_test), 1):
353406

354407
### Marginal coverage on a 24-hour rolling window of prediction intervals
355408

409+
356410
```python
357411
plt.figure(figsize=(10, 5))
358412
plt.ylabel(f"Rolling coverage [{window} hours]")
359413
plt.plot(y_test[window:].index, rolling_coverage_npfit, label="Without update of residuals")
360414
plt.plot(y_test[window:].index, rolling_coverage_pfit, label="With update of residuals")
361415
```
362416

417+
418+
419+
420+
[<matplotlib.lines.Line2D at 0x16b986710>]
421+
422+
423+
424+
425+
426+
![png](output_37_1.png)
427+
428+
429+
363430
### Temporal evolution of the distribution of residuals used for estimating prediction intervals
364431

432+
365433
```python
366434
plt.figure(figsize=(7, 5))
367435
for i, j in enumerate([0, -1]):
@@ -370,3 +438,16 @@ for i, j in enumerate([0, -1]):
370438
plt.axvline(higher_quantiles_pfit[j], ls="--", color=f"C{i}")
371439
plt.legend(loc=[1, 0])
372440
```
441+
442+
443+
444+
445+
<matplotlib.legend.Legend at 0x16b985390>
446+
447+
448+
449+
450+
451+
![png](output_39_1.png)
452+
453+

0 commit comments

Comments
 (0)