@@ -126,36 +126,9 @@ def run(self, config, instance=None,
126126 include = self .include ,
127127 exclude = self .exclude ,
128128 disable_file_output = self .disable_file_output )
129- if self .resampling_strategy != 'test' :
130- if D .info ['task' ] in CLASSIFICATION_TASKS and \
131- D .info ['task' ] != MULTILABEL_CLASSIFICATION :
132- y = D .data ['Y_train' ].ravel ()
133- if self .resampling_strategy in ['holdout' ,
134- 'holdout-iterative-fit' ]:
135- cv = StratifiedShuffleSplit (y = y , n_iter = 1 , train_size = 0.67 ,
136- test_size = 0.33 , random_state = 1 )
137- elif self .resampling_strategy in ['cv' , 'partial-cv' ,
138- 'partial-cv-iterative-fit' ]:
139- cv = StratifiedKFold (y = y ,
140- n_folds = self .resampling_strategy_args [
141- 'folds' ],
142- shuffle = True , random_state = 1 )
143- else :
144- raise ValueError (self .resampling_strategy )
145- else :
146- n = D .data ['Y_train' ].shape [0 ]
147- if self .resampling_strategy in ['holdout' ,
148- 'holdout-iterative-fit' ]:
149- cv = ShuffleSplit (n = n , n_iter = 1 , train_size = 0.67 ,
150- test_size = 0.33 , random_state = 1 )
151- elif self .resampling_strategy in ['cv' , 'partial-cv' ,
152- 'partial-cv-iterative-fit' ]:
153- cv = KFold (n = n ,
154- n_folds = self .resampling_strategy_args ['folds' ],
155- shuffle = True , random_state = 1 )
156- else :
157- raise ValueError (self .resampling_strategy )
158129
130+ if self .resampling_strategy != 'test' :
131+ cv = self .get_splitter (D )
159132 obj_kwargs ['cv' ] = cv
160133 if instance is not None :
161134 obj_kwargs ['instance' ] = instance
@@ -208,3 +181,43 @@ def run(self, config, instance=None,
208181 self .num_run += 1
209182 return status , cost , runtime , additional_run_info
210183
184+ def get_splitter (self , D ):
185+ y = D .data ['Y_train' ].ravel ()
186+ n = D .data ['Y_train' ].shape [0 ]
187+ if D .info ['task' ] in CLASSIFICATION_TASKS and \
188+ D .info ['task' ] != MULTILABEL_CLASSIFICATION :
189+
190+ if self .resampling_strategy in ['holdout' ,
191+ 'holdout-iterative-fit' ]:
192+ try :
193+ cv = StratifiedShuffleSplit (y = y , n_iter = 1 , train_size = 0.67 ,
194+ test_size = 0.33 , random_state = 1 )
195+ except ValueError as e :
196+ if 'The least populated class in y has only' in e .args [0 ]:
197+ cv = ShuffleSplit (n = n , n_iter = 1 , train_size = 0.67 ,
198+ test_size = 0.33 , random_state = 1 )
199+ else :
200+ raise
201+
202+ elif self .resampling_strategy in ['cv' , 'partial-cv' ,
203+ 'partial-cv-iterative-fit' ]:
204+ cv = StratifiedKFold (y = y ,
205+ n_folds = self .resampling_strategy_args [
206+ 'folds' ],
207+ shuffle = True , random_state = 1 )
208+ else :
209+ raise ValueError (self .resampling_strategy )
210+ else :
211+ if self .resampling_strategy in ['holdout' ,
212+ 'holdout-iterative-fit' ]:
213+ cv = ShuffleSplit (n = n , n_iter = 1 , train_size = 0.67 ,
214+ test_size = 0.33 , random_state = 1 )
215+ elif self .resampling_strategy in ['cv' , 'partial-cv' ,
216+ 'partial-cv-iterative-fit' ]:
217+ cv = KFold (n = n ,
218+ n_folds = self .resampling_strategy_args ['folds' ],
219+ shuffle = True , random_state = 1 )
220+ else :
221+ raise ValueError (self .resampling_strategy )
222+ return cv
223+
0 commit comments