|
44 | 44 | np.random.seed(0) |
45 | 45 | X = np.linspace(0, 10, n_points).reshape(-1, 1) |
46 | 46 | group_size = n_points // 10 |
47 | | -groups_list = [] |
| 47 | +partition_list = [] |
48 | 48 | for i in range(10): |
49 | | - groups_list.append(np.array([i] * group_size)) |
50 | | -groups = np.concatenate(groups_list) |
| 49 | + partition_list.append(np.array([i] * group_size)) |
| 50 | +partition = np.concatenate(partition_list) |
51 | 51 |
|
52 | 52 | noise_0_1 = np.random.normal(0, 0.1, group_size) |
53 | 53 | noise_1_2 = np.random.normal(0, 0.5, group_size) |
|
62 | 62 |
|
63 | 63 | y = np.concatenate( |
64 | 64 | [ |
65 | | - np.sin(X[groups == 0, 0] * 2) + noise_0_1, |
66 | | - np.sin(X[groups == 1, 0] * 2) + noise_1_2, |
67 | | - np.sin(X[groups == 2, 0] * 2) + noise_2_3, |
68 | | - np.sin(X[groups == 3, 0] * 2) + noise_3_4, |
69 | | - np.sin(X[groups == 4, 0] * 2) + noise_4_5, |
70 | | - np.sin(X[groups == 5, 0] * 2) + noise_5_6, |
71 | | - np.sin(X[groups == 6, 0] * 2) + noise_6_7, |
72 | | - np.sin(X[groups == 7, 0] * 2) + noise_7_8, |
73 | | - np.sin(X[groups == 8, 0] * 2) + noise_8_9, |
74 | | - np.sin(X[groups == 9, 0] * 2) + noise_9_10, |
| 65 | + np.sin(X[partition == 0, 0] * 2) + noise_0_1, |
| 66 | + np.sin(X[partition == 1, 0] * 2) + noise_1_2, |
| 67 | + np.sin(X[partition == 2, 0] * 2) + noise_2_3, |
| 68 | + np.sin(X[partition == 3, 0] * 2) + noise_3_4, |
| 69 | + np.sin(X[partition == 4, 0] * 2) + noise_4_5, |
| 70 | + np.sin(X[partition == 5, 0] * 2) + noise_5_6, |
| 71 | + np.sin(X[partition == 6, 0] * 2) + noise_6_7, |
| 72 | + np.sin(X[partition == 7, 0] * 2) + noise_7_8, |
| 73 | + np.sin(X[partition == 8, 0] * 2) + noise_8_9, |
| 74 | + np.sin(X[partition == 9, 0] * 2) + noise_9_10, |
75 | 75 | ], axis=0 |
76 | 76 | ) |
77 | 77 |
|
78 | 78 |
|
79 | 79 | ############################################################################## |
80 | | -# We plot the dataset with the groups as colors. |
| 80 | +# We plot the dataset with the partition as colors. |
81 | 81 |
|
82 | 82 |
|
83 | | -plt.scatter(X, y, c=groups) |
| 83 | +plt.scatter(X, y, c=partition) |
84 | 84 | plt.show() |
85 | 85 |
|
86 | 86 |
|
|
91 | 91 | X_train_temp, X_test, y_train_temp, y_test = train_test_split( |
92 | 92 | X, y, test_size=0.2, random_state=0 |
93 | 93 | ) |
94 | | -groups_train_temp, groups_test, _, _ = train_test_split( |
95 | | - groups, y, test_size=0.2, random_state=0 |
| 94 | +partition_train_temp, partition_test, _, _ = train_test_split( |
| 95 | + partition, y, test_size=0.2, random_state=0 |
96 | 96 | ) |
97 | 97 | X_cal, X_train, y_cal, y_train = train_test_split( |
98 | 98 | X_train_temp, y_train_temp, test_size=0.5, random_state=0 |
99 | 99 | ) |
100 | | -groups_cal, groups_train, _, _ = train_test_split( |
101 | | - groups_train_temp, y_train_temp, test_size=0.5, random_state=0 |
| 100 | +partition_cal, partition_train, _, _ = train_test_split( |
| 101 | + partition_train_temp, y_train_temp, test_size=0.5, random_state=0 |
102 | 102 | ) |
103 | 103 |
|
104 | 104 |
|
|
107 | 107 |
|
108 | 108 |
|
109 | 109 | f, ax = plt.subplots(1, 3, figsize=(15, 5)) |
110 | | -ax[0].scatter(X_train, y_train, c=groups_train) |
| 110 | +ax[0].scatter(X_train, y_train, c=partition_train) |
111 | 111 | ax[0].set_title("Train set") |
112 | | -ax[1].scatter(X_cal, y_cal, c=groups_cal) |
| 112 | +ax[1].scatter(X_cal, y_cal, c=partition_cal) |
113 | 113 | ax[1].set_title("Calibration set") |
114 | | -ax[2].scatter(X_test, y_test, c=groups_test) |
| 114 | +ax[2].scatter(X_test, y_test, c=partition_test) |
115 | 115 | ax[2].set_title("Test set") |
116 | 116 | plt.show() |
117 | 117 |
|
|
131 | 131 | mapie_regressor = MapieRegressor(rf, cv="prefit") |
132 | 132 | mondrian_regressor = MondrianCP(MapieRegressor(rf, cv="prefit")) |
133 | 133 | mapie_regressor.fit(X_cal, y_cal) |
134 | | -mondrian_regressor.fit(X_cal, y_cal, groups=groups_cal) |
| 134 | +mondrian_regressor.fit(X_cal, y_cal, partition=partition_cal) |
135 | 135 |
|
136 | 136 |
|
137 | 137 | ############################################################################## |
|
140 | 140 |
|
141 | 141 | _, y_pss_split = mapie_regressor.predict(X_test, alpha=.1) |
142 | 142 | _, y_pss_mondrian = mondrian_regressor.predict( |
143 | | - X_test, groups=groups_test, alpha=.1 |
| 143 | + X_test, partition=partition_test, alpha=.1 |
144 | 144 | ) |
145 | 145 |
|
146 | 146 |
|
147 | 147 | ############################################################################## |
148 | | -# 6. Compare the coverage by groups, plot both methods side by side. |
| 148 | +# 6. Compare the coverage by partition, plot both methods side by side. |
149 | 149 |
|
150 | 150 |
|
151 | 151 | coverages = {} |
152 | | -for group in np.unique(groups_test): |
| 152 | +for group in np.unique(partition_test): |
153 | 153 | coverages[group] = {} |
154 | 154 | coverages[group]["split"] = regression_coverage_score_v2( |
155 | | - y_test[groups_test == group], y_pss_split[groups_test == group] |
| 155 | + y_test[partition_test == group], y_pss_split[partition_test == group] |
156 | 156 | ) |
157 | 157 | coverages[group]["mondrian"] = regression_coverage_score_v2( |
158 | | - y_test[groups_test == group], y_pss_mondrian[groups_test == group] |
| 158 | + y_test[partition_test == group], |
| 159 | + y_pss_mondrian[partition_test == group] |
159 | 160 | ) |
160 | 161 |
|
161 | 162 |
|
|
178 | 179 | plt.hlines(0.9, -1, 21, label="90% coverage", color="black", linestyle="--") |
179 | 180 | plt.ylabel("Coverage") |
180 | 181 | plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) |
| 182 | +plt.tight_layout() |
181 | 183 | plt.show() |
0 commit comments