Skip to content

Commit 9041d4d

Browse files
authored
Merge pull request #56 from simai-ml/move-alpha-to-predict
Move alpha to predict and make corrections in tests, examples, and docs
2 parents 56de273 + 5198334 commit 9041d4d

File tree

12 files changed

+257
-215
lines changed

12 files changed

+257
-215
lines changed

HISTORY.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ History
55
0.2.2 (2021-06-XX)
66
------------------
77

8+
* Set alpha parameter as predict argument, with None as default value
89
* Switch to github actions for continuous integration of the code
910
* Add image explaining MAPIE internals on the README
1011

README.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,9 @@ and two standard deviations from the mean.
8787
8888
from mapie.estimators import MapieRegressor
8989
alpha = [0.05, 0.32]
90-
mapie = MapieRegressor(regressor, alpha=alpha)
90+
mapie = MapieRegressor(regressor)
9191
mapie.fit(X, y)
92-
y_preds = mapie.predict(X)
92+
y_pred, y_pis = mapie.predict(X, alpha=alpha)
9393
9494
9595
@@ -105,18 +105,18 @@ The estimated prediction intervals can then be plotted as follows.
105105
plt.xlabel("x")
106106
plt.ylabel("y")
107107
plt.scatter(X, y, alpha=0.3)
108-
plt.plot(X, y_preds[:, 0, 0], color="C1")
108+
plt.plot(X, y_pred, color="C1")
109109
order = np.argsort(X[:, 0])
110-
plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
111-
plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
110+
plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
111+
plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
112112
plt.fill_between(
113113
X[order].ravel(),
114-
y_preds[:, 1, 0][order].ravel(),
115-
y_preds[:, 2, 0][order].ravel(),
114+
y_pis[order][:, 0, 0].ravel(),
115+
y_pis[order][:, 1, 0].ravel(),
116116
alpha=0.2
117117
)
118118
coverage_scores = [
119-
coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
119+
coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
120120
for i, _ in enumerate(alpha)
121121
]
122122
plt.title(

doc/images/quickstart_1.png

0 Bytes
Loading

doc/quick_start.rst

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ and two standard deviations from the mean.
4949
5050
from mapie.estimators import MapieRegressor
5151
alpha = [0.05, 0.32]
52-
mapie = MapieRegressor(regressor, alpha=alpha)
52+
mapie = MapieRegressor(regressor)
5353
mapie.fit(X, y)
54-
y_preds = mapie.predict(X)
54+
y_pred, y_pis = mapie.predict(X, alpha=alpha)
5555
5656
5757
3. Show the results
@@ -68,7 +68,7 @@ The estimated prediction intervals can then be plotted as follows.
6868
from mapie.metrics import coverage_score
6969
7070
coverage_scores = [
71-
coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
71+
coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
7272
for i, _ in enumerate(alpha)
7373
]
7474
@@ -77,12 +77,12 @@ The estimated prediction intervals can then be plotted as follows.
7777
plt.scatter(X, y, alpha=0.3)
7878
plt.plot(X, y_preds[:, 0, 0], color="C1")
7979
order = np.argsort(X[:, 0])
80-
plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
81-
plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
80+
plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
81+
plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
8282
plt.fill_between(
8383
X[order].ravel(),
84-
y_preds[:, 1, 0][order].ravel(),
85-
y_preds[:, 2, 0][order].ravel(),
84+
y_pis[order][:, 0, 0].ravel(),
85+
y_pis[order][:, 1, 0].ravel(),
8686
alpha=0.2
8787
)
8888
plt.title(

doc/tutorial.rst

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,11 @@ in order to obtain a 95% confidence for our prediction intervals.
112112
"cv_plus": dict(method="plus", cv=10),
113113
"cv_minmax": dict(method="minmax", cv=10),
114114
}
115-
y_preds = {}
115+
y_pred, y_pis = {}, {}
116116
for strategy, params in STRATEGIES.items():
117-
mapie = MapieRegressor(polyn_model, alpha=0.05, ensemble=False, **params)
117+
mapie = MapieRegressor(polyn_model, ensemble=False, **params)
118118
mapie.fit(X_train, y_train)
119-
y_preds[strategy] = mapie.predict(X_test)[:, :, 0]
119+
y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
120120
121121
Let’s now compare the confidence intervals with the predicted intervals with obtained
122122
by the Jackknife+, Jackknife-minmax, CV+, and CV-minmax strategies.
@@ -159,9 +159,9 @@ by the Jackknife+, Jackknife-minmax, CV+, and CV-minmax strategies.
159159
X_test.ravel(),
160160
y_mesh.ravel(),
161161
1.96*noise,
162-
y_preds[strategy][:, 0].ravel(),
163-
y_preds[strategy][:, 1].ravel(),
164-
y_preds[strategy][:, 2].ravel(),
162+
y_pred[strategy].ravel(),
163+
y_pis[strategy][:, 0, 0].ravel(),
164+
y_pis[strategy][:, 1, 0].ravel(),
165165
ax=coord,
166166
title=strategy
167167
)
@@ -178,7 +178,7 @@ Let’s confirm this by comparing the prediction interval widths over
178178
179179
fig, ax = plt.subplots(1, 1, figsize=(7, 5))
180180
for strategy in STRATEGIES:
181-
ax.plot(X_test, y_preds[strategy][:, 2] - y_preds[strategy][:, 1])
181+
ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0])
182182
ax.axhline(1.96*2*noise, ls="--", color="k")
183183
ax.set_xlabel("x")
184184
ax.set_ylabel("Prediction Interval Width")
@@ -311,11 +311,11 @@ strategies.
311311
"cv_plus": dict(method="plus", cv=10),
312312
"cv_minmax": dict(method="minmax", cv=10),
313313
}
314-
prediction_interval = {}
314+
y_pred, y_pis = {}, {}
315315
for strategy, params in STRATEGIES.items():
316-
mapie = MapieRegressor(polyn_model, alpha=0.05, ensemble=False, **params)
316+
mapie = MapieRegressor(polyn_model, ensemble=False, **params)
317317
mapie.fit(X_train, y_train)
318-
y_preds[strategy] = mapie.predict(X_test)[:, :, 0]
318+
y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)
319319
320320
321321
.. code:: python
@@ -331,9 +331,9 @@ strategies.
331331
X_test.ravel(),
332332
y_mesh.ravel(),
333333
1.96*noise,
334-
y_preds[strategy][:, 0].ravel(),
335-
y_preds[strategy][:, 1].ravel(),
336-
y_preds[strategy][:, 2].ravel(),
334+
y_pred[strategy].ravel(),
335+
y_pis[strategy][:, 0, :].ravel(),
336+
y_pis[strategy][:, 1, :].ravel(),
337337
ax=coord,
338338
title=strategy
339339
)
@@ -354,7 +354,7 @@ Let's now compare the prediction interval widths between all strategies.
354354
fig, ax = plt.subplots(1, 1, figsize=(7, 5))
355355
ax.set_yscale("log")
356356
for strategy in STRATEGIES:
357-
ax.plot(X_test, y_preds[strategy][:, 2] - y_preds[strategy][:, 1])
357+
ax.plot(X_test, y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0])
358358
ax.axhline(1.96*2*noise, ls="--", color="k")
359359
ax.set_xlabel("x")
360360
ax.set_ylabel("Prediction Interval Width")
@@ -514,9 +514,9 @@ and compare their prediction interval.
514514
model_names = ["polyn", "xgb", "mlp"]
515515
prediction_interval = {}
516516
for name, model in zip(model_names, models):
517-
mapie = MapieRegressor(model, alpha=0.05, method="plus", cv=5, ensemble=True)
517+
mapie = MapieRegressor(model, method="plus", cv=5, ensemble=True)
518518
mapie.fit(X_train, y_train)
519-
y_preds[name] = mapie.predict(X_test)[:, :, 0]
519+
y_pred[name], y_pis[name] = mapie.predict(X_test, alpha=0.05)
520520
521521
.. code:: python
522522
@@ -528,9 +528,9 @@ and compare their prediction interval.
528528
X_test.ravel(),
529529
y_mesh.ravel(),
530530
1.96*noise,
531-
y_preds[name][:, 0].ravel(),
532-
y_preds[name][:, 1].ravel(),
533-
y_preds[name][:, 2].ravel(),
531+
y_pred[name].ravel(),
532+
y_pis[name][:, 0, 0].ravel(),
533+
y_pis[name][:, 1, 0].ravel(),
534534
ax=ax,
535535
title=name
536536
)

examples/plot_barber2020_simulations.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,18 +108,17 @@ def PIs_vs_dimensions(
108108
for strategy, params in strategies.items():
109109
mapie = MapieRegressor(
110110
LinearRegression(),
111-
alpha=alpha,
112111
ensemble=True,
113112
n_jobs=-1,
114113
**params
115114
)
116115
mapie.fit(X_train, y_train)
117-
y_preds = mapie.predict(X_test)[:, :, 0]
116+
y_pred, y_pis = mapie.predict(X_test, alpha=alpha)
118117
results[strategy][dimension]["coverage"][trial] = (
119-
coverage_score(y_test, y_preds[:, 1], y_preds[:, 2])
118+
coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
120119
)
121120
results[strategy][dimension]["width_mean"][trial] = (
122-
y_preds[:, 2] - y_preds[:, 1]
121+
y_pis[:, 1, 0] - y_pis[:, 0, 0]
123122
).mean()
124123
return results
125124

@@ -198,3 +197,4 @@ def plot_simulation_results(
198197
dimensions = np.arange(10, 150, 10)
199198
results = PIs_vs_dimensions(STRATEGIES, alpha, ntrial, dimensions)
200199
plot_simulation_results(results, title="Coverages and interval widths")
200+
plt.show()

examples/plot_homoscedastic_1d_data.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,22 +139,22 @@ def plot_1d_data(
139139
for i, (strategy, params) in enumerate(STRATEGIES.items()):
140140
mapie = MapieRegressor(
141141
polyn_model,
142-
alpha=0.05,
143142
ensemble=True,
144143
n_jobs=-1,
145144
**params
146145
)
147146
mapie.fit(X_train.reshape(-1, 1), y_train)
148-
y_preds = mapie.predict(X_test.reshape(-1, 1))[:, :, 0]
147+
y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=0.05,)
149148
plot_1d_data(
150149
X_train,
151150
y_train,
152151
X_test,
153152
y_test,
154153
y_test_sigma,
155-
y_preds[:, 0],
156-
y_preds[:, 1],
157-
y_preds[:, 2],
154+
y_pred,
155+
y_pis[:, 0, 0],
156+
y_pis[:, 1, 0],
158157
axs[i],
159158
strategy
160159
)
160+
plt.show()

examples/plot_nested-cv.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,20 +90,21 @@
9090
best_est = cv_obj.best_estimator_
9191
mapie_non_nested = MapieRegressor(
9292
best_est,
93-
alpha=alpha,
9493
method="plus",
9594
cv=cv,
9695
ensemble=True,
9796
n_jobs=-1
9897
)
9998
mapie_non_nested.fit(X_train, y_train)
100-
y_preds_non_nested = mapie_non_nested.predict(X_test)[:, :, 0]
101-
widths_non_nested = y_preds_non_nested[:, 2] - y_preds_non_nested[:, 1]
99+
y_pred_non_nested, y_pis_non_nested = mapie_non_nested.predict(
100+
X_test, alpha=alpha
101+
)
102+
widths_non_nested = y_pis_non_nested[:, 1, 0] - y_pis_non_nested[:, 0, 0]
102103
coverage_non_nested = coverage_score(
103-
y_test, y_preds_non_nested[:, 1], y_preds_non_nested[:, 2]
104+
y_test, y_pis_non_nested[:, 0, 0], y_pis_non_nested[:, 1, 0]
104105
)
105106
score_non_nested = mean_squared_error(
106-
y_test, y_preds_non_nested[:, 0], squared=False
107+
y_test, y_pred_non_nested, squared=False
107108
)
108109

109110
# Nested approach with the CV+ strategy using the Random Forest model.
@@ -120,18 +121,17 @@
120121
)
121122
mapie_nested = MapieRegressor(
122123
cv_obj,
123-
alpha=alpha,
124124
method="plus",
125125
cv=cv,
126126
ensemble=True
127127
)
128128
mapie_nested.fit(X_train, y_train)
129-
y_preds_nested = mapie_nested.predict(X_test)[:, :, 0]
130-
widths_nested = y_preds_nested[:, 2] - y_preds_nested[:, 1]
129+
y_pred_nested, y_pis_nested = mapie_nested.predict(X_test, alpha=alpha)
130+
widths_nested = y_pis_nested[:, 1] - y_pis_nested[:, 0]
131131
coverage_nested = coverage_score(
132-
y_test, y_preds_nested[:, 1], y_preds_nested[:, 2]
132+
y_test, y_pis_nested[:, 0, 0], y_pis_nested[:, 1, 0]
133133
)
134-
score_nested = mean_squared_error(y_test, y_preds_nested[:, 0], squared=False)
134+
score_nested = mean_squared_error(y_test, y_pred_nested, squared=False)
135135

136136
# Print scores and effective coverages.
137137
print(

examples/plot_prefit_nn.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,13 @@ def f(x: np.ndarray) -> np.ndarray:
4545
model.fit(X_train.reshape(-1, 1), y_train)
4646

4747
# Calibrate uncertainties on validation set
48-
alpha = 0.1
49-
mapie = MapieRegressor(model, alpha=alpha, cv="prefit")
48+
mapie = MapieRegressor(model, cv="prefit")
5049
mapie.fit(X_val.reshape(-1, 1), y_val)
5150

5251
# Evaluate prediction and coverage level on testing set
53-
y_pred, y_pred_low, y_pred_up = mapie.predict(X_test.reshape(-1, 1))[:, :, 0].T
52+
alpha = 0.1
53+
y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=alpha)
54+
y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
5455
coverage = coverage_score(y_test, y_pred_low, y_pred_up)
5556

5657
# Plot obtained prediction intervals on testing set

examples/plot_toy_model.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,26 @@
1818
X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59)
1919

2020
alpha = [0.05, 0.32]
21-
mapie = MapieRegressor(regressor, alpha=alpha, method="plus")
21+
mapie = MapieRegressor(regressor, method="plus")
2222
mapie.fit(X, y)
23-
y_preds = mapie.predict(X)
23+
y_pred, y_pis = mapie.predict(X, alpha=alpha)
2424

2525
coverage_scores = [
26-
coverage_score(y, y_preds[:, 1, i], y_preds[:, 2, i])
26+
coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i])
2727
for i, _ in enumerate(alpha)
2828
]
2929

3030
plt.xlabel("x")
3131
plt.ylabel("y")
3232
plt.scatter(X, y, alpha=0.3)
33-
plt.plot(X, y_preds[:, 0, 0], color="C1")
33+
plt.plot(X, y_pred, color="C1")
3434
order = np.argsort(X[:, 0])
35-
plt.plot(X[order], y_preds[order][:, 1, 1], color="C1", ls="--")
36-
plt.plot(X[order], y_preds[order][:, 2, 1], color="C1", ls="--")
35+
plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--")
36+
plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--")
3737
plt.fill_between(
3838
X[order].ravel(),
39-
y_preds[:, 1, 0][order].ravel(),
40-
y_preds[:, 2, 0][order].ravel(),
39+
y_pis[order][:, 0, 0].ravel(),
40+
y_pis[order][:, 1, 0].ravel(),
4141
alpha=0.2
4242
)
4343
plt.title(

0 commit comments

Comments
 (0)