Skip to content

Commit c0a3255

Browse files
author
Thibault Cordier
committed
UPD: change dataset in notebook
1 parent aeb7894 commit c0a3255

File tree

1 file changed

+10
-13
lines changed

1 file changed

+10
-13
lines changed

examples/regression/2-advanced-analysis/plot_nested-cv.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,35 +45,34 @@
4545
"""
4646
import matplotlib.pyplot as plt
4747
import numpy as np
48-
import pandas as pd
4948
from scipy.stats import randint
5049
from sklearn.ensemble import RandomForestRegressor
5150
from sklearn.metrics import mean_squared_error
5251
from sklearn.model_selection import RandomizedSearchCV, train_test_split
52+
from sklearn.datasets import make_sparse_uncorrelated
5353

5454
from mapie.metrics import regression_coverage_score
5555
from mapie.regression import MapieRegressor
5656

57-
# Load the Boston data
58-
data_url = "http://lib.stat.cmu.edu/datasets/boston"
59-
raw_df = pd.read_csv(data_url, sep=r'\s+', skiprows=22, header=None)
60-
X_boston = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
61-
y_boston = raw_df.values[1::2, 2]
57+
58+
random_state = 42
59+
60+
# Load the toy data
61+
X, y = make_sparse_uncorrelated(500, random_state=random_state)
6262

6363
# Split the data into training and test sets.
6464
X_train, X_test, y_train, y_test = train_test_split(
65-
X_boston, y_boston, test_size=0.2, random_state=42
65+
X, y, test_size=0.2, random_state=random_state
6666
)
6767

6868
# Define the Random Forest model as base regressor with parameter ranges.
69-
rf_model = RandomForestRegressor(random_state=59, verbose=0)
69+
rf_model = RandomForestRegressor(random_state=random_state, verbose=0)
7070
rf_params = {"max_depth": randint(2, 10), "n_estimators": randint(10, 100)}
7171

7272
# Cross-validation and prediction-interval parameters.
7373
cv = 10
7474
n_iter = 5
7575
alpha = 0.05
76-
random_state = 59
7776

7877
# Non-nested approach with the CV+ strategy using the Random Forest model.
7978
cv_obj = RandomizedSearchCV(
@@ -144,12 +143,10 @@
144143

145144
# Compare prediction interval widths.
146145
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 6))
147-
min_x = 14.0
148-
max_x = 17.0
146+
min_x = np.min([np.min(widths_nested), np.min(widths_non_nested)])
147+
max_x = np.max([np.max(widths_nested), np.max(widths_non_nested)])
149148
ax1.set_xlabel("Prediction interval width using the nested CV approach")
150149
ax1.set_ylabel("Prediction interval width using the non-nested CV approach")
151-
ax1.set_xlim([min_x, max_x])
152-
ax1.set_ylim([min_x, max_x])
153150
ax1.scatter(widths_nested, widths_non_nested)
154151
ax1.plot([min_x, max_x], [min_x, max_x], ls="--", color="k")
155152
ax2.axvline(x=0, color="r", lw=2)

0 commit comments

Comments
 (0)