3535 'TimeSeriesSplit' : {'n_splits' : 3 ,
3636 'max_train_size' : None },
3737 'GroupShuffleSplit' : {'n_splits' : 5 ,
38- 'test_size' : 'default' ,
38+ 'test_size' : None ,
3939 'random_state' : None },
4040 'StratifiedShuffleSplit' : {'n_splits' : 10 ,
41- 'test_size' : 'default' ,
41+ 'test_size' : None ,
4242 'random_state' : None },
4343 'ShuffleSplit' : {'n_splits' : 10 ,
44- 'test_size' : 'default' ,
44+ 'test_size' : None ,
4545 'random_state' : None }
4646 }
4747
@@ -137,6 +137,12 @@ def fit_predict_and_loss(self, iterative=False):
137137 train_splits = [None ] * self .cv_folds
138138
139139 y = _get_y_array (self .Y_train , self .task_type )
140+
141+ train_losses = [] # stores train loss of each fold.
142+ train_fold_weights = [] # used as weights when averaging train losses.
143+ opt_losses = [] # stores opt (validation) loss of each fold.
144+ opt_fold_weights = [] # weights for opt_losses.
145+
140146 # TODO: mention that no additional run info is possible in this
141147 # case! -> maybe remove full CV from the train evaluator anyway and
142148 # make the user implement this!
@@ -179,30 +185,57 @@ def fit_predict_and_loss(self, iterative=False):
179185 Y_test_pred [i ] = test_pred
180186 train_splits [i ] = train_split
181187
188+ # Compute train loss of this fold and store it. train_loss could
189+ # either be a scalar or a dict of scalars with metrics as keys.
190+ train_loss = self ._loss (
191+ self .Y_train_targets [train_split ],
192+ train_pred ,
193+ )
194+ train_losses .append (train_loss )
195+ # number of training data points for this fold. Used for weighting
196+ # the average.
197+ train_fold_weights .append (len (train_split ))
198+
199+ # Compute validation loss of this fold and store it.
200+ optimization_loss = self ._loss (
201+ self .Y_targets [i ],
202+ opt_pred ,
203+ )
204+ opt_losses .append (optimization_loss )
205+ # number of optimization data points for this fold. Used for weighting
206+ # the average.
207+ opt_fold_weights .append (len (test_split ))
208+
209+ # Compute weights of each fold based on the number of samples in each
210+ # fold.
211+ train_fold_weights = [w / sum (train_fold_weights ) for w in train_fold_weights ]
212+ opt_fold_weights = [w / sum (opt_fold_weights ) for w in opt_fold_weights ]
213+
214+ # train_losses is a list of either scalars or dicts. If it contains dicts,
215+ # then train_loss is computed using the target metric (self.metric).
216+ if all (isinstance (elem , dict ) for elem in train_losses ):
217+ train_loss = np .average ([train_losses [i ][str (self .metric )]
218+ for i in range (self .cv_folds )],
219+ weights = train_fold_weights ,
220+ )
221+ else :
222+ train_loss = np .average (train_losses , weights = train_fold_weights )
223+
224+ # if all_scoring_function is true, return a dict of opt_loss. Otherwise,
225+ # return a scalar.
226+ if self .all_scoring_functions is True :
227+ opt_loss = {}
228+ for metric in opt_losses [0 ].keys ():
229+ opt_loss [metric ] = np .average ([opt_losses [i ][metric ]
230+ for i in range (self .cv_folds )],
231+ weights = opt_fold_weights ,
232+ )
233+ else :
234+ opt_loss = np .average (opt_losses , weights = opt_fold_weights )
235+
182236 Y_targets = self .Y_targets
183237 Y_train_targets = self .Y_train_targets
184238
185- Y_train_pred_full = np .array (
186- [
187- np .ones (
188- (self .Y_train .shape [0 ], Y_train_pred [i ].shape [1 ])
189- ) * np .NaN
190- for _ in range (self .cv_folds ) if Y_train_pred [i ] is not None
191- ]
192- )
193- for i in range (self .cv_folds ):
194- if Y_train_pred [i ] is None :
195- continue
196- Y_train_pred_full [i ][train_splits [i ]] = Y_train_pred [i ]
197- Y_train_pred = np .nanmean (Y_train_pred_full , axis = 0 )
198- if self .cv_folds == 1 :
199- Y_train_pred = Y_train_pred [
200- # if the first column is np.NaN, all other columns have
201- # to be np.NaN as well
202- np .isfinite (Y_train_pred [:, 0 ])
203- ]
204-
205-
206239 Y_optimization_pred = np .concatenate (
207240 [Y_optimization_pred [i ] for i in range (self .cv_folds )
208241 if Y_optimization_pred [i ] is not None ])
@@ -240,8 +273,8 @@ def fit_predict_and_loss(self, iterative=False):
240273 self ._added_empty_model = True
241274
242275 self .finish_up (
243- loss = loss ,
244- train_pred = Y_train_pred ,
276+ loss = opt_loss ,
277+ train_loss = train_loss ,
245278 opt_pred = Y_optimization_pred ,
246279 valid_pred = Y_valid_pred ,
247280 test_pred = Y_test_pred ,
@@ -282,6 +315,7 @@ def partial_fit_predict_and_loss(self, fold, iterative=False):
282315 iterative = iterative ,
283316 )
284317 )
318+ train_loss = self ._loss (self .Y_actual_train , train_pred )
285319 loss = self ._loss (self .Y_targets [fold ], opt_pred )
286320
287321 if self .cv_folds > 1 :
@@ -292,7 +326,7 @@ def partial_fit_predict_and_loss(self, fold, iterative=False):
292326
293327 self .finish_up (
294328 loss = loss ,
295- train_pred = train_pred ,
329+ train_loss = train_loss ,
296330 opt_pred = opt_pred ,
297331 valid_pred = valid_pred ,
298332 test_pred = test_pred ,
@@ -345,6 +379,9 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices,
345379 if self .cv_folds == 1 :
346380 self .model = model
347381
382+ train_loss = self ._loss (self .Y_train [train_indices ],
383+ Y_train_pred ,
384+ )
348385 loss = self ._loss (self .Y_train [test_indices ], Y_optimization_pred )
349386 additional_run_info = model .get_additional_run_info ()
350387
@@ -354,7 +391,7 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices,
354391 final_call = False
355392 self .finish_up (
356393 loss = loss ,
357- train_pred = Y_train_pred ,
394+ train_loss = train_loss ,
358395 opt_pred = Y_optimization_pred ,
359396 valid_pred = Y_valid_pred ,
360397 test_pred = Y_test_pred ,
@@ -386,11 +423,14 @@ def _partial_fit_and_predict(self, fold, train_indices, test_indices,
386423 train_indices = train_indices ,
387424 test_indices = test_indices
388425 )
426+ train_loss = self ._loss (self .Y_train [train_indices ],
427+ Y_train_pred ,
428+ )
389429 loss = self ._loss (self .Y_train [test_indices ], Y_optimization_pred )
390430 additional_run_info = model .get_additional_run_info ()
391431 self .finish_up (
392432 loss = loss ,
393- train_pred = Y_train_pred ,
433+ train_loss = train_loss ,
394434 opt_pred = Y_optimization_pred ,
395435 valid_pred = Y_valid_pred ,
396436 test_pred = Y_test_pred ,
0 commit comments