|
70 | 70 | # diabetes dataset. |
71 | 71 |
|
72 | 72 | n_folds = 5 |
73 | | -regressor = RidgeCV(alphas=np.logspace(-3, 3, 10)) |
| 73 | +regressor = RidgeCV( |
| 74 | + alphas=np.logspace(-3, 3, 10), |
| 75 | + cv=KFold(shuffle=True, random_state=20), |
| 76 | +) |
74 | 77 | regressor_list = [clone(regressor) for _ in range(n_folds)] |
75 | | -kf = KFold(n_splits=n_folds, shuffle=True, random_state=0) |
| 78 | +kf = KFold(n_splits=n_folds, shuffle=True, random_state=21) |
76 | 79 | for i, (train_index, test_index) in enumerate(kf.split(X)): |
77 | 80 | regressor_list[i].fit(X[train_index], y[train_index]) |
78 | 81 | score = r2_score( |
|
86 | 89 | print(f"Fold {i}: {mse=}") |
87 | 90 |
|
88 | 91 | # %% |
89 | | -# Fit a baselien model on the diabetes dataset |
| 92 | +# Fit a baseline model on the diabetes dataset |
90 | 93 | # -------------------------------------------- |
91 | 94 | # We use a Ridge regression model with a 10-fold cross-validation to fit the |
92 | 95 | # diabetes dataset. |
93 | 96 |
|
94 | 97 | n_folds = 10 |
95 | 98 | regressor = RidgeCV(alphas=np.logspace(-3, 3, 10)) |
96 | 99 | regressor_list = [clone(regressor) for _ in range(n_folds)] |
97 | | -kf = KFold(n_splits=n_folds, shuffle=True, random_state=0) |
| 100 | +kf = KFold(n_splits=n_folds, shuffle=True, random_state=21) |
98 | 101 | for i, (train_index, test_index) in enumerate(kf.split(X)): |
99 | 102 | regressor_list[i].fit(X[train_index], y[train_index]) |
100 | 103 | score = r2_score( |
|
112 | 115 | # -------------------------------------------------------- |
113 | 116 |
|
114 | 117 | cfi_importance_list = [] |
| 118 | +kf = KFold(n_splits=n_folds, shuffle=True, random_state=21) |
115 | 119 | for i, (train_index, test_index) in enumerate(kf.split(X)): |
116 | 120 | print(f"Fold {i}") |
117 | 121 | X_train, X_test = X[train_index], X[test_index] |
118 | 122 | y_train, y_test = y[train_index], y[test_index] |
119 | 123 | cfi = CFI( |
120 | 124 | estimator=regressor_list[i], |
121 | | - imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 10)), |
| 125 | + imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 10), cv=KFold()), |
122 | 126 | imputation_model_categorical=LogisticRegressionCV( |
123 | 127 | Cs=np.logspace(-2, 2, 10), |
| 128 | + cv=KFold(), |
124 | 129 | ), |
125 | 130 | # covariate_estimator=HistGradientBoostingRegressor(random_state=0,), |
126 | 131 | n_permutations=50, |
127 | | - random_state=0, |
| 132 | + random_state=24, |
128 | 133 | n_jobs=4, |
129 | 134 | ) |
130 | 135 | cfi.fit(X_train, y_train) |
|
136 | 141 | # --------------------------------------------------------- |
137 | 142 |
|
138 | 143 | loco_importance_list = [] |
139 | | - |
| 144 | +kf = KFold(n_splits=n_folds, shuffle=True, random_state=21) |
140 | 145 | for i, (train_index, test_index) in enumerate(kf.split(X)): |
141 | 146 | print(f"Fold {i}") |
142 | 147 | X_train, X_test = X[train_index], X[test_index] |
|
155 | 160 | # ---------------------------------------------------------------- |
156 | 161 |
|
157 | 162 | pfi_importance_list = [] |
158 | | - |
| 163 | +kf = KFold(n_splits=n_folds, shuffle=True, random_state=21) |
159 | 164 | for i, (train_index, test_index) in enumerate(kf.split(X)): |
160 | 165 | print(f"Fold {i}") |
161 | 166 | X_train, X_test = X[train_index], X[test_index] |
162 | 167 | y_train, y_test = y[train_index], y[test_index] |
163 | 168 | pfi = PFI( |
164 | 169 | estimator=regressor_list[i], |
165 | 170 | n_permutations=50, |
166 | | - random_state=0, |
| 171 | + random_state=25, |
167 | 172 | n_jobs=4, |
168 | 173 | ) |
169 | 174 | pfi.fit(X_train, y_train) |
|
0 commit comments