@@ -187,7 +187,8 @@ def __init__(self,
187187 self ._metric = None
188188 self ._label_num = None
189189 self .models_ = None
190- self .ensemble_indices_ = None
190+ self .ensemble_ = None
191+ self ._can_predict = False
191192
192193 self ._debug_mode = debug_mode
193194 self ._backend = Backend (self ._output_dir , self ._tmp_dir )
@@ -242,9 +243,14 @@ def fit(self, X, y,
242243 raise ValueError ('Array feat_type does not have same number of '
243244 'variables as X has features. %d vs %d.' %
244245 (len (feat_type ), X .shape [1 ]))
245- if feat_type is not None and not all ([isinstance (f , bool )
246+ if feat_type is not None and not all ([isinstance (f , str )
246247 for f in feat_type ]):
247- raise ValueError ('Array feat_type must only contain bools.' )
248+ raise ValueError ('Array feat_type must only contain strings.' )
249+ if feat_type is not None :
250+ for ft in feat_type :
251+ if ft .lower () not in ['categorical' , 'numerical' ]:
252+ raise ValueError ('Only `Categorical` and `Numerical` are '
253+ 'valid feature types, you passed `%s`' % ft )
248254
249255 loaded_data_manager = XYDataManager (X , y ,
250256 task = task ,
@@ -298,16 +304,19 @@ def _print_load_time(basename, time_left_for_this_task,
298304 return time_for_load_data
299305
300306 def _do_dummy_prediction (self , datamanager ):
307+ self ._logger .info ("Starting to create dummy predictions." )
301308 autosklearn .cli .base_interface .main (datamanager ,
302309 self ._resampling_strategy ,
303310 None ,
304311 None ,
305- mode_args = self ._resampling_strategy_arguments )
312+ mode_args = self ._resampling_strategy_arguments ,
313+ output_dir = self ._tmp_dir )
314+ self ._logger .info ("Finished creating dummy predictions." )
306315
307316 def _fit (self , datamanager ):
308317 # Reset learnt stuff
309318 self .models_ = None
310- self .ensemble_indices_ = None
319+ self .ensemble_ = None
311320
312321 # Check arguments prior to doing anything!
313322 if self ._resampling_strategy not in ['holdout' , 'holdout-iterative-fit' ,
@@ -352,7 +361,8 @@ def _fit(self, datamanager):
352361 self ._logger )
353362
354363 # == Perform dummy predictions
355- self ._do_dummy_prediction (datamanager )
364+ if self ._resampling_strategy in ['holdout' , 'holdout-iterative-fit' ]:
365+ self ._do_dummy_prediction (datamanager )
356366
357367 # = Create a searchspace
358368 # Do this before One Hot Encoding to make sure that it creates a
@@ -371,6 +381,12 @@ def _fit(self, datamanager):
371381 self ._include_preprocessors )
372382 self .configuration_space_created_hook (datamanager )
373383
384+ # == RUN ensemble builder
385+ # Do this before calculating the meta-features to make sure that the
386+ # dummy predictions are actually included in the ensemble even if
387+ # calculating the meta-features takes very long
388+ proc_ensembles = self .run_ensemble_builder ()
389+
374390 # == Calculate metafeatures
375391 meta_features = _calculate_metafeatures (
376392 data_feat_type = datamanager .feat_type ,
@@ -481,9 +497,6 @@ def _fit(self, datamanager):
481497 resampling_strategy_arguments = self ._resampling_strategy_arguments ,
482498 shared_mode = self ._shared_mode )
483499
484- # == RUN ensemble builder
485- proc_ensembles = self .run_ensemble_builder ()
486-
487500 procs = []
488501
489502 if proc_smac is not None :
@@ -554,26 +567,43 @@ def run_ensemble_builder(self,
554567 'size 0.' )
555568 return None
556569
570+ def refit (self , X , y ):
571+ if self ._keep_models is not True :
572+ raise ValueError (
573+ "Predict can only be called if 'keep_models==True'" )
574+ if self .models_ is None or len (self .models_ ) == 0 or \
575+ self .ensemble_ is None :
576+ self ._load_models ()
577+
578+ for identifier in self .models_ :
579+ if identifier in self .ensemble_ .get_model_identifiers ():
580+ model = self .models_ [identifier ]
581+ # this updates the model inplace, it can then later be used in
582+ # predict method
583+ model .fit (X .copy (), y .copy ())
584+
585+ self ._can_predict = True
586+
557587 def predict (self , X ):
588+ return np .argmax (self .predict_proba (X ), axis = 1 )
589+
590+ def predict_proba (self , X ):
558591 if self ._keep_models is not True :
559592 raise ValueError (
560593 "Predict can only be called if 'keep_models==True'" )
561- if self ._resampling_strategy not in ['holdout' ,
562- 'holdout-iterative-fit' ]:
594+ if not self ._can_predict and \
595+ self ._resampling_strategy not in \
596+ ['holdout' , 'holdout-iterative-fit' ]:
563597 raise NotImplementedError (
564598 'Predict is currently only implemented for resampling '
565599 'strategy holdout.' )
566600
567- if self .models_ is None or len (self .models_ ) == 0 or len (
568- self .ensemble_indices_ ) == 0 :
601+ if self .models_ is None or len (self .models_ ) == 0 or \
602+ self .ensemble_ is None :
569603 self ._load_models ()
570604
571- predictions = []
572- for identifier in self .models_ :
573- if identifier not in self .ensemble_indices_ :
574- continue
575-
576- weight = self .ensemble_indices_ [identifier ]
605+ all_predictions = []
606+ for identifier in self .ensemble_ .get_model_identifiers ():
577607 model = self .models_ [identifier ]
578608
579609 X_ = X .copy ()
@@ -588,16 +618,16 @@ def predict(self, X):
588618 "while X_.shape is %s" %
589619 (model , str (prediction .shape ),
590620 str (X_ .shape )))
591- predictions .append (prediction * weight )
621+ all_predictions .append (prediction )
592622
593- if len (predictions ) == 0 :
623+ if len (all_predictions ) == 0 :
594624 raise ValueError ('Something went wrong generating the predictions. '
595625 'The ensemble should consist of the following '
596626 'models: %s, the following models were loaded: '
597627 '%s' % (str (list (self .ensemble_indices_ .keys ())),
598628 str (list (self .models_ .keys ()))))
599629
600- predictions = np . sum ( np . array ( predictions ), axis = 0 )
630+ predictions = self . ensemble_ . predict ( all_predictions )
601631 return predictions
602632
603633 def _load_models (self ):
@@ -610,42 +640,23 @@ def _load_models(self):
610640 if len (self .models_ ) == 0 :
611641 raise ValueError ('No models fitted!' )
612642
613- self .ensemble_indices_ = self ._backend .load_ensemble_indices_weights (
614- seed )
643+ self .ensemble_ = self ._backend .load_ensemble (seed )
615644
616645 def score (self , X , y ):
617646 # fix: Consider only index 1 of second dimension
618647 # Don't know if the reshaping should be done there or in calculate_score
619- prediction = self .predict (X )
620- if self ._task == BINARY_CLASSIFICATION :
621- prediction = prediction [:, 1 ].reshape ((- 1 , 1 ))
648+ prediction = self .predict_proba (X )
622649 return calculate_score (y , prediction , self ._task ,
623650 self ._metric , self ._label_num ,
624651 logger = self ._logger )
625652
626653 def show_models (self ):
627- if self .models_ is None or len (self .models_ ) == 0 or len (
628- self .ensemble_indices_ ) == 0 :
629- self ._load_models ()
630654
631- output = []
632- sio = six .StringIO ()
633- for identifier in self .models_ :
634- if identifier not in self .ensemble_indices_ :
635- continue
636-
637- weight = self .ensemble_indices_ [identifier ]
638- model = self .models_ [identifier ]
639- output .append ((weight , model ))
640-
641- output .sort (reverse = True )
642-
643- sio .write ("[" )
644- for weight , model in output :
645- sio .write ("(%f, %s),\n " % (weight , model ))
646- sio .write ("]" )
655+ if self .models_ is None or len (self .models_ ) == 0 or \
656+ self .ensemble_ is None :
657+ self ._load_models ()
647658
648- return sio . getvalue ( )
659+ return self . ensemble_ . pprint_ensemble_string ( self . models_ )
649660
650661 def _save_ensemble_data (self , X , y ):
651662 """Split dataset and store Data for the ensemble script.
0 commit comments