|
25 | 25 | in which the training set is prior to the validation set. |
26 | 26 | The best model is then feeded into |
27 | 27 | :class:`~mapie.time_series_regression.TimeSeriesRegressor` to estimate the |
28 | | -associated prediction intervals. We compare two approaches: with or without |
29 | | -``partial_fit`` called at every step following [6]. It appears that |
30 | | -``partial_fit`` offer a coverage closer to the targeted coverage, and with |
31 | | -narrower PIs. |
| 28 | +associated prediction intervals. We compare two approaches: with or without calling |
| 29 | +``update`` at every step, following [6]. The results show coverage closer |
| 30 | +to the target, along with narrower PIs. |
32 | 31 | """ |
33 | 32 |
|
34 | 33 | import warnings |
|
123 | 122 | n_jobs=-1, |
124 | 123 | ) |
125 | 124 |
|
126 | | -print("EnbPI, with no partial_fit, width optimization") |
| 125 | +print("EnbPI, with no update, width optimization") |
127 | 126 | mapie_enpbi = mapie_enpbi.fit(X_train, y_train) |
128 | | -y_pred_npfit_enbpi, y_pis_npfit_enbpi = mapie_enpbi.predict( |
| 127 | +y_pred_n_update_enbpi, y_pis_n_update_enbpi = mapie_enpbi.predict( |
129 | 128 | X_test, confidence_level=1-alpha, ensemble=True, optimize_beta=True |
130 | 129 | ) |
131 | | -coverage_npfit_enbpi = regression_coverage_score( |
132 | | - y_test, y_pis_npfit_enbpi |
| 130 | +coverage_n_update_enbpi = regression_coverage_score( |
| 131 | + y_test, y_pis_n_update_enbpi |
133 | 132 | )[0] |
134 | 133 |
|
135 | | -width_npfit_enbpi = regression_mean_width_score( |
136 | | - y_pis_npfit_enbpi |
| 134 | +width_n_update_enbpi = regression_mean_width_score( |
| 135 | + y_pis_n_update_enbpi |
137 | 136 | )[0] |
138 | 137 |
|
139 | | -print("EnbPI with partial_fit, width optimization") |
| 138 | +print("EnbPI with update, width optimization") |
140 | 139 | mapie_enpbi = mapie_enpbi.fit(X_train, y_train) |
141 | | -y_pred_pfit_enbpi = np.zeros(y_pred_npfit_enbpi.shape) |
142 | | -y_pis_pfit_enbpi = np.zeros(y_pis_npfit_enbpi.shape) |
| 140 | +y_pred_update_enbpi = np.zeros(y_pred_n_update_enbpi.shape) |
| 141 | +y_pis_update_enbpi = np.zeros(y_pis_n_update_enbpi.shape) |
143 | 142 |
|
144 | 143 | step_size = 1 |
145 | 144 | ( |
146 | | - y_pred_pfit_enbpi[:step_size], |
147 | | - y_pis_pfit_enbpi[:step_size, :, :], |
| 145 | + y_pred_update_enbpi[:step_size], |
| 146 | + y_pis_update_enbpi[:step_size, :, :], |
148 | 147 | ) = mapie_enpbi.predict( |
149 | 148 | X_test.iloc[:step_size, :], confidence_level=1-alpha, ensemble=True, |
150 | 149 | optimize_beta=True |
151 | 150 | ) |
152 | 151 |
|
153 | 152 | for step in range(step_size, len(X_test), step_size): |
154 | | - mapie_enpbi.partial_fit( |
| 153 | + mapie_enpbi.update( |
155 | 154 | X_test.iloc[(step - step_size):step, :], |
156 | 155 | y_test.iloc[(step - step_size):step], |
157 | 156 | ) |
158 | 157 | ( |
159 | | - y_pred_pfit_enbpi[step:step + step_size], |
160 | | - y_pis_pfit_enbpi[step:step + step_size, :, :], |
| 158 | + y_pred_update_enbpi[step:step + step_size], |
| 159 | + y_pis_update_enbpi[step:step + step_size, :, :], |
161 | 160 | ) = mapie_enpbi.predict( |
162 | 161 | X_test.iloc[step:(step + step_size), :], |
163 | 162 | confidence_level=1-alpha, |
164 | 163 | ensemble=True, |
165 | 164 | ) |
166 | | -coverage_pfit_enbpi = regression_coverage_score( |
167 | | - y_test, y_pis_pfit_enbpi |
| 165 | +coverage_update_enbpi = regression_coverage_score( |
| 166 | + y_test, y_pis_update_enbpi |
168 | 167 | )[0] |
169 | | -width_pfit_enbpi = regression_mean_width_score( |
170 | | - y_pis_pfit_enbpi |
| 168 | +width_update_enbpi = regression_mean_width_score( |
| 169 | + y_pis_update_enbpi |
171 | 170 | )[0] |
172 | 171 |
|
173 | 172 | # Print results |
174 | 173 | print( |
175 | 174 | "Coverage / prediction interval width mean for TimeSeriesRegressor: " |
176 | | - "\nEnbPI without any partial_fit:" |
177 | | - f"{coverage_npfit_enbpi:.3f}, {width_npfit_enbpi:.3f}" |
| 175 | + "\nEnbPI without any update:" |
| 176 | + f"{coverage_n_update_enbpi:.3f}, {width_n_update_enbpi:.3f}" |
178 | 177 | ) |
179 | 178 | print( |
180 | 179 | "Coverage / prediction interval width mean for TimeSeriesRegressor: " |
181 | | - "\nEnbPI with partial_fit:" |
182 | | - f"{coverage_pfit_enbpi:.3f}, {width_pfit_enbpi:.3f}" |
| 180 | + "\nEnbPI with update:" |
| 181 | + f"{coverage_update_enbpi:.3f}, {width_update_enbpi:.3f}" |
183 | 182 | ) |
184 | 183 |
|
185 | | -enbpi_no_pfit = { |
186 | | - "y_pred": y_pred_npfit_enbpi, |
187 | | - "y_pis": y_pis_npfit_enbpi, |
188 | | - "coverage": coverage_npfit_enbpi, |
189 | | - "width": width_npfit_enbpi, |
| 184 | +enbpi_no_update = { |
| 185 | + "y_pred": y_pred_n_update_enbpi, |
| 186 | + "y_pis": y_pis_n_update_enbpi, |
| 187 | + "coverage": coverage_n_update_enbpi, |
| 188 | + "width": width_n_update_enbpi, |
190 | 189 | } |
191 | 190 |
|
192 | | -enbpi_pfit = { |
193 | | - "y_pred": y_pred_pfit_enbpi, |
194 | | - "y_pis": y_pis_pfit_enbpi, |
195 | | - "coverage": coverage_pfit_enbpi, |
196 | | - "width": width_pfit_enbpi, |
| 191 | +enbpi_update = { |
| 192 | + "y_pred": y_pred_update_enbpi, |
| 193 | + "y_pis": y_pis_update_enbpi, |
| 194 | + "coverage": coverage_update_enbpi, |
| 195 | + "width": width_update_enbpi, |
197 | 196 | } |
198 | 197 |
|
199 | | -results = [enbpi_no_pfit, enbpi_pfit] |
| 198 | +results = [enbpi_no_update, enbpi_update] |
200 | 199 |
|
201 | 200 | # Plot estimated prediction intervals on test set |
202 | 201 | fig, axs = plt.subplots( |
203 | 202 | nrows=2, ncols=1, figsize=(15, 12), sharex="col" |
204 | 203 | ) |
205 | 204 |
|
206 | 205 | for i, (ax, w, result) in enumerate( |
207 | | - zip(axs, ["EnbPI, without partial_fit", "EnbPI with partial_fit"], results) |
| 206 | + zip(axs, ["EnbPI, without update", "EnbPI with update"], results) |
208 | 207 | ): |
209 | 208 | ax.set_ylabel("Hourly demand (GW)", fontsize=20) |
210 | 209 | ax.plot(demand_test.Demand, lw=2, label="Test data", c="C1") |
|
0 commit comments