|
13 | 13 | from mapie.metrics import regression_coverage_score |
14 | 14 | from mapie.quantile_regression import MapieQuantileRegressor |
15 | 15 |
|
| 16 | +random_state = 2 |
| 17 | + |
16 | 18 | ############################################################################## |
17 | 19 | # We generate a synthetic data. |
18 | 20 |
|
|
22 | 24 | alpha = 0.2 |
23 | 25 |
|
24 | 26 | # Fit a Gradient Boosting Regressor for quantile regression |
25 | | -quantiles = [0.1, 0.9] |
26 | | -gb_reg = GradientBoostingRegressor(loss="quantile", alpha=quantiles[1]) |
27 | | -gb_reg.fit(X, y) |
| 27 | +gb_reg = GradientBoostingRegressor( |
| 28 | + loss="quantile", alpha=0.5, random_state=random_state |
| 29 | +) |
28 | 30 |
|
29 | 31 | # MAPIE Quantile Regressor |
30 | 32 | mapie_qr = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) |
31 | | -mapie_qr.fit(X, y) |
| 33 | +mapie_qr.fit(X, y, random_state=random_state) |
32 | 34 | y_pred_sym, y_pis_sym = mapie_qr.predict(X, symmetry=True) |
33 | 35 | y_pred_asym, y_pis_asym = mapie_qr.predict(X, symmetry=False) |
34 | 36 | y_qlow = mapie_qr.estimators_[0].predict(X) |
|
64 | 66 | plt.xlabel("x") |
65 | 67 | plt.ylabel("y") |
66 | 68 | plt.scatter(X, y, alpha=0.3) |
67 | | -#plt.plot(X_sorted, y_pred_sym_sorted, color="C1") |
68 | 69 | plt.plot(X_sorted, y_qlow, color="C1") |
69 | 70 | plt.plot(X_sorted, y_qup, color="C1") |
70 | 71 | plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--") |
|
86 | 87 | plt.xlabel("x") |
87 | 88 | plt.ylabel("y") |
88 | 89 | plt.scatter(X, y, alpha=0.3) |
89 | | -#plt.plot(X_sorted, y_pred_asym_sorted, color="C2") |
90 | 90 | plt.plot(X_sorted, y_qlow, color="C2") |
91 | 91 | plt.plot(X_sorted, y_qup, color="C2") |
92 | 92 | plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--") |
|
106 | 106 | plt.show() |
107 | 107 |
|
108 | 108 | ############################################################################## |
109 | | -# The symmetric intervals (`symmetry=True`) are easier to interpret and |
110 | | -# tend to have higher coverage but might not adapt well to varying |
111 | | -# noise levels. The asymmetric intervals (`symmetry=False`) are more |
112 | | -# flexible and better capture heteroscedasticity but can appear more jagged. |
| 109 | +# The symmetric intervals (`symmetry=True`) use a combined set of residuals |
| 110 | +# for both bounds, while the asymmetric intervals use distinct residuals for |
| 111 | +# each bound, allowing for more flexible and accurate intervals that reflect |
| 112 | +# the heteroscedastic nature of the data. The resulting effective coverages |
| 113 | +# demonstrate the theoretical guarantee of the target coverage level |
| 114 | +# $(1−\alpha)$. |
0 commit comments