ADD example for sequential execution of model and ensemble fitting

mfeurer · mfeurer · commit e952b51498bc · 2016-10-14T14:05:18.000+02:00
diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
@@ -194,7 +194,8 @@ def main(self):
                                             predictions.shape[1])
 
                 except Exception as e:
-                    self.logger.warning('Error loading %s: %s', basename, e)
+                    self.logger.warning('Error loading %s: %s - %s',
+                                        basename, type(e), e)
                     score = -1
 
                 model_names_to_scores[model_name] = score
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -256,7 +256,8 @@ def fit(self, *args, **kwargs):
     def fit_ensemble(self, task=None, metric=None, precision='32',
                      dataset_name=None, ensemble_nbest=None,
                      ensemble_size=None):
-        self._automl = self.build_automl()
+        if self._automl is None:
+            self._automl = self.build_automl()
         return self._automl.fit_ensemble(task, metric, precision,
                                          dataset_name, ensemble_nbest,
                                          ensemble_size)
diff --git a/doc/manual.rst b/doc/manual.rst
@@ -23,6 +23,20 @@ model by writing it to disk after every iteration. At the beginning of each
 iteration, SMAC loads all newly found data points. An example can be found in
 the example directory.
 
+In it's default mode, auto-sklearn already uses two cores. The first one is
+used for model building, the second for building an ensemble every time a new
+machine learning model has finished training. The file `example_sequential
+.py` in the example directory describes how to run these tasks sequentially
+to use only a single core at a time.
+
+Furthermore, depending on the installation of scikit-learn and numpy,
+the model building procedure may use up to all cores. Such behaviour is
+unintended by auto-sklearn and is most likely due to numpy being installed
+from `pypi` as a binary wheel (`see here http://scikit-learn-general.narkive
+.com/44ywvAHA/binary-wheel-packages-for-linux-are-coming`_). Executing
+`export OPENBLAS_NUM_THREADS=1` should disable such behaviours and make numpy
+ only use a single core at a time.
+
 Model persistence
 *****************
 
diff --git a/example/example_crossvalidation.py b/example/example_crossvalidation.py
@@ -1,6 +1,4 @@
 # -*- encoding: utf-8 -*-
-from __future__ import print_function
-
 import sklearn.datasets
 import numpy as np
 
diff --git a/example/example_holdout.py b/example/example_holdout.py
@@ -1,9 +1,6 @@
-# -*- encoding: utf-8 -*-
-from __future__ import print_function
 from operator import itemgetter
 
 import numpy as np
-import pandas as pd
 import sklearn.datasets
 import sklearn.metrics
 
diff --git a/example/example_parallel.py b/example/example_parallel.py
@@ -1,4 +1,4 @@
-import logging
+# -*- encoding: utf-8 -*-
 import multiprocessing
 
 import numpy as np
diff --git a/example/example_regression.py b/example/example_regression.py
@@ -1,6 +1,4 @@
 # -*- encoding: utf-8 -*-
-from __future__ import print_function
-
 import numpy as np
 import sklearn.datasets
 import sklearn.metrics
@@ -16,7 +14,7 @@ def main():
     np.random.shuffle(indices)
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     automl = autosklearn.regression.AutoSklearnRegressor(
-        time_left_for_this_task=60, per_run_time_limit=30,
+        time_left_for_this_task=600,
         tmp_folder='/tmp/autoslearn_regression_example_tmp',
         output_folder='/tmp/autosklearn_regression_example_out')
     automl.fit(X_train, y_train, dataset_name='boston')

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-import logging`
	`1`	`+# -- encoding: utf-8 --`
`2`	`2`	`import multiprocessing`
`3`	`3`
`4`	`4`	`import numpy as np`