1
- ---
2
- jupyter :
3
- jupytext :
4
- formats : ipynb,md
5
- text_representation :
6
- extension : .md
7
- format_name : markdown
8
- format_version : ' 1.3'
9
- jupytext_version : 1.13.6
10
- kernelspec :
11
- display_name : mapie-notebooks
12
- language : python
13
- name : mapie-notebooks
14
- ---
15
-
16
1
# Estimating prediction intervals of time series forecast with EnbPI
17
2
18
-
19
3
[ ![ Open In Colab] ( https://colab.research.google.com/assets/colab-badge.svg )] ( https://colab.research.google.com/github/scikit-learn-contrib/MAPIE/blob/add-ts-notebooks/notebooks/regression/ts-changepoint.ipynb )
20
4
21
-
22
5
This example uses ` mapie.time_series_regression.MapieTimeSeriesRegressor ` to estimate
23
6
prediction intervals associated with time series forecast. It follows Xu \& Xie (2021).
24
7
We use here the Victoria electricity demand dataset used in the book
@@ -34,12 +17,14 @@ The best model is then feeded into
34
17
associated prediction intervals. We compare four approaches: with or without
35
18
`` partial_fit `` called at every step.
36
19
20
+
37
21
``` python
38
22
install_mapie = False
39
23
if install_mapie:
40
- ! pip install " git+https://github.com/scikit-learn-contrib/MAPIE.git@add-ts-notebooks "
24
+ ! pip install mapie
41
25
```
42
26
27
+
43
28
``` python
44
29
import warnings
45
30
@@ -61,6 +46,7 @@ warnings.simplefilter("ignore")
61
46
62
47
## 1. Load input data and feature engineering
63
48
49
+
64
50
``` python
65
51
url_file = " https://raw.githubusercontent.com/scikit-learn-contrib/MAPIE/master/examples/data/demand_temperature.csv"
66
52
demand_df = pd.read_csv(
@@ -79,6 +65,7 @@ for hour in range(1, n_lags):
79
65
80
66
## 2. Train/validation/test split
81
67
68
+
82
69
``` python
83
70
num_test_steps = 24 * 7
84
71
demand_train = demand_df.iloc[:- num_test_steps, :].copy()
@@ -94,15 +81,30 @@ X_test = demand_test.loc[:, features]
94
81
y_test = demand_test[" Demand" ]
95
82
```
96
83
84
+
97
85
``` python
98
86
plt.figure(figsize = (16 , 5 ))
99
87
plt.plot(y_train)
100
88
plt.plot(y_test)
101
89
plt.ylabel(" Hourly demand (GW)" )
102
90
```
103
91
92
+
93
+
94
+
95
+ Text(0, 0.5, 'Hourly demand (GW)')
96
+
97
+
98
+
99
+
100
+
101
+ ![ png] ( output_9_1.png )
102
+
103
+
104
+
104
105
## 3. Optimize the base estimator
105
106
107
+
106
108
``` python
107
109
model_params_fit_not_done = False
108
110
if model_params_fit_not_done:
@@ -133,6 +135,7 @@ else:
133
135
134
136
## 4. Estimate prediction intervals on the test set
135
137
138
+
136
139
``` python
137
140
alpha = 0.05
138
141
gap = 1
@@ -146,11 +149,12 @@ mapie_enbpi = MapieTimeSeriesRegressor(
146
149
147
150
### Without partial fit
148
151
152
+
149
153
``` python
150
154
print (" EnbPI, with no partial_fit, width optimization" )
151
155
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
152
156
y_pred_npfit, y_pis_npfit = mapie_enbpi.predict(
153
- X_test, alpha = alpha, ensemble = True , beta_optimize = True
157
+ X_test, alpha = alpha, ensemble = True , optimize_beta = True
154
158
)
155
159
coverage_npfit = regression_coverage_score(
156
160
y_test, y_pis_npfit[:, 0 , 0 ], y_pis_npfit[:, 1 , 0 ]
@@ -160,16 +164,20 @@ width_npfit = regression_mean_width_score(
160
164
)
161
165
```
162
166
167
+ EnbPI, with no partial_fit, width optimization
168
+
169
+
163
170
### With partial fit
164
171
172
+
165
173
``` python
166
174
print (" EnbPI with partial_fit, width optimization" )
167
175
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
168
176
169
177
y_pred_pfit = np.zeros(y_pred_npfit.shape)
170
178
y_pis_pfit = np.zeros(y_pis_npfit.shape)
171
179
y_pred_pfit[:gap], y_pis_pfit[:gap, :, :] = mapie_enbpi.predict(
172
- X_test.iloc[:gap, :], alpha = alpha, ensemble = True
180
+ X_test.iloc[:gap, :], alpha = alpha, ensemble = True , optimize_beta = True
173
181
)
174
182
for step in range (gap, len (X_test), gap):
175
183
mapie_enbpi.partial_fit(
@@ -182,7 +190,8 @@ for step in range(gap, len(X_test), gap):
182
190
) = mapie_enbpi.predict(
183
191
X_test.iloc[step:(step + gap), :],
184
192
alpha = alpha,
185
- ensemble = True
193
+ ensemble = True ,
194
+ optimize_beta = True
186
195
)
187
196
coverage_pfit = regression_coverage_score(
188
197
y_test, y_pis_pfit[:, 0 , 0 ], y_pis_pfit[:, 1 , 0 ]
@@ -192,15 +201,20 @@ width_pfit = regression_mean_width_score(
192
201
)
193
202
```
194
203
204
+ EnbPI with partial_fit, width optimization
205
+
206
+
195
207
## V. Plot estimated prediction intervals on test set
196
208
209
+
197
210
``` python
198
211
y_preds = [y_pred_npfit, y_pred_pfit]
199
212
y_pis = [y_pis_npfit, y_pis_pfit]
200
213
coverages = [coverage_npfit, coverage_pfit]
201
214
widths = [width_npfit, width_pfit]
202
215
```
203
216
217
+
204
218
``` python
205
219
def plot_forecast (y_train , y_test , y_preds , y_pis , coverages , widths , plot_coverage = True ):
206
220
fig, axs = plt.subplots(
@@ -231,23 +245,30 @@ def plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_cover
231
245
plt.show()
232
246
```
233
247
248
+
234
249
``` python
235
250
plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths)
236
251
```
237
252
238
- ## VI. Forecast on test dataset with change point
239
253
254
+
255
+ ![ png] ( output_21_0.png )
256
+
240
257
241
- We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training.
242
258
259
+ ## VI. Forecast on test dataset with change point
260
+
261
+ We will now see how MAPIE adapts its prediction intervals when a brutal changepoint arises in the test set. To simulate this, we will artificially decrease the electricity demand by 2 GW in the test set, aiming at simulating an effect, such as blackout or lockdown due to a pandemic, that was not taken into account by the model during its training.
243
262
244
263
### Corrupt the dataset
245
264
265
+
246
266
``` python
247
267
demand_df_corrupted = demand_df.copy()
248
268
demand_df_corrupted.Demand.iloc[- int (num_test_steps/ 2 ):] -= 2
249
269
```
250
270
271
+
251
272
``` python
252
273
n_lags = 5
253
274
for hour in range (1 , n_lags):
@@ -263,20 +284,35 @@ X_test = demand_test_corrupted.loc[:, features]
263
284
y_test = demand_test_corrupted[" Demand" ]
264
285
```
265
286
287
+
266
288
``` python
267
289
plt.figure(figsize = (16 , 5 ))
268
290
plt.ylabel(" Hourly demand (GW)" )
269
291
plt.plot(y_train)
270
292
plt.plot(y_test)
271
293
```
272
294
295
+
296
+
297
+
298
+ [<matplotlib.lines.Line2D at 0x16a409930>]
299
+
300
+
301
+
302
+
303
+
304
+ ![ png] ( output_27_1.png )
305
+
306
+
307
+
273
308
### Prediction intervals without partial fit
274
309
310
+
275
311
``` python
276
312
print (" EnbPI, with no partial_fit, width optimization" )
277
313
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
278
314
y_pred_npfit, y_pis_npfit = mapie_enbpi.predict(
279
- X_test, alpha = alpha, ensemble = True , beta_optimize = True
315
+ X_test, alpha = alpha, ensemble = True , optimize_beta = True
280
316
)
281
317
coverage_npfit = regression_coverage_score(
282
318
y_test, y_pis_npfit[:, 0 , 0 ], y_pis_npfit[:, 1 , 0 ]
@@ -286,8 +322,12 @@ width_npfit = regression_mean_width_score(
286
322
)
287
323
```
288
324
325
+ EnbPI, with no partial_fit, width optimization
326
+
327
+
289
328
### Prediction intervals with partial fit
290
329
330
+
291
331
``` python
292
332
print (" EnbPI with partial_fit, width optimization" )
293
333
mapie_enbpi = mapie_enbpi.fit(X_train, y_train)
@@ -296,7 +336,7 @@ y_pred_pfit = np.zeros(y_pred_npfit.shape)
296
336
y_pis_pfit = np.zeros(y_pis_npfit.shape)
297
337
conformity_scores_pfit, lower_quantiles_pfit, higher_quantiles_pfit = [], [], []
298
338
y_pred_pfit[:gap], y_pis_pfit[:gap, :, :] = mapie_enbpi.predict(
299
- X_test.iloc[:gap, :], alpha = alpha, ensemble = True
339
+ X_test.iloc[:gap, :], alpha = alpha, ensemble = True , optimize_beta = True
300
340
)
301
341
for step in range (gap, len (X_test), gap):
302
342
mapie_enbpi.partial_fit(
@@ -309,7 +349,8 @@ for step in range(gap, len(X_test), gap):
309
349
) = mapie_enbpi.predict(
310
350
X_test.iloc[step:(step + gap), :],
311
351
alpha = alpha,
312
- ensemble = True
352
+ ensemble = True ,
353
+ optimize_beta = True
313
354
)
314
355
conformity_scores_pfit.append(mapie_enbpi.conformity_scores_)
315
356
lower_quantiles_pfit.append(mapie_enbpi.lower_quantiles_)
@@ -322,19 +363,31 @@ width_pfit = regression_mean_width_score(
322
363
)
323
364
```
324
365
366
+ EnbPI with partial_fit, width optimization
367
+
368
+
325
369
### Plot estimated prediction intervals on test set
326
370
371
+
327
372
``` python
328
373
y_preds = [y_pred_npfit, y_pred_pfit]
329
374
y_pis = [y_pis_npfit, y_pis_pfit]
330
375
coverages = [coverage_npfit, coverage_pfit]
331
376
widths = [width_npfit, width_pfit]
332
377
```
333
378
379
+
334
380
``` python
335
381
plot_forecast(y_train, y_test, y_preds, y_pis, coverages, widths, plot_coverage = False )
336
382
```
337
383
384
+
385
+
386
+ ![ png] ( output_34_0.png )
387
+
388
+
389
+
390
+
338
391
``` python
339
392
window = 24
340
393
rolling_coverage_pfit, rolling_coverage_npfit = [], []
@@ -353,15 +406,30 @@ for i in range(window, len(y_test), 1):
353
406
354
407
### Marginal coverage on a 24-hour rolling window of prediction intervals
355
408
409
+
356
410
``` python
357
411
plt.figure(figsize = (10 , 5 ))
358
412
plt.ylabel(f " Rolling coverage [ { window} hours] " )
359
413
plt.plot(y_test[window:].index, rolling_coverage_npfit, label = " Without update of residuals" )
360
414
plt.plot(y_test[window:].index, rolling_coverage_pfit, label = " With update of residuals" )
361
415
```
362
416
417
+
418
+
419
+
420
+ [<matplotlib.lines.Line2D at 0x16b986710>]
421
+
422
+
423
+
424
+
425
+
426
+ ![ png] ( output_37_1.png )
427
+
428
+
429
+
363
430
### Temporal evolution of the distribution of residuals used for estimating prediction intervals
364
431
432
+
365
433
``` python
366
434
plt.figure(figsize = (7 , 5 ))
367
435
for i, j in enumerate ([0 , - 1 ]):
@@ -370,3 +438,16 @@ for i, j in enumerate([0, -1]):
370
438
plt.axvline(higher_quantiles_pfit[j], ls = " --" , color = f " C { i} " )
371
439
plt.legend(loc = [1 , 0 ])
372
440
```
441
+
442
+
443
+
444
+
445
+ <matplotlib.legend.Legend at 0x16b985390>
446
+
447
+
448
+
449
+
450
+
451
+ ![ png] ( output_39_1.png )
452
+
453
+
0 commit comments