automl
diff --git a/‎CHANGES.md‎
Lines changed: 0 additions & 3 deletions b/‎CHANGES.md‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎Dockerfile‎
Lines changed: 19 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎autosklearn/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎autosklearn/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autosklearn/__version__.py‎
Lines changed: 1 addition & 1 deletion b/‎autosklearn/__version__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autosklearn/automl.py‎
Lines changed: 16 additions & 5 deletions b/‎autosklearn/automl.py‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎autosklearn/ensembles/abstract_ensemble.py‎
Lines changed: 4 additions & 3 deletions b/‎autosklearn/ensembles/abstract_ensemble.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎autosklearn/ensembles/ensemble_selection.py‎
Lines changed: 3 additions & 9 deletions b/‎autosklearn/ensembles/ensemble_selection.py‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎autosklearn/estimators.py‎
Lines changed: 18 additions & 6 deletions b/‎autosklearn/estimators.py‎
Lines changed: 18 additions & 6 deletions
diff --git a/‎autosklearn/evaluation/__init__.py‎
Lines changed: 10 additions & 7 deletions b/‎autosklearn/evaluation/__init__.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎autosklearn/pipeline/base.py‎
Lines changed: 6 additions & 2 deletions b/‎autosklearn/pipeline/base.py‎
Lines changed: 6 additions & 2 deletions
@@ -0,0 +1,19 @@
+FROM ubuntu
+
+# System requirements
+RUN apt-get update && apt-get install -y \
+  build-essential \
+  curl \
+  python3-pip \
+  swig \
+  && rm -rf /var/lib/apt/lists/*
+
+# Upgrade pip then install dependencies
+RUN pip3 install --upgrade pip
+RUN curl https://raw.githubusercontent.com/automl/auto-sklearn/master/requirements.txt \
+  | xargs -n 1 -L 1 pip3 install
+
+# Install
+RUN pip3 install \
+  auto-sklearn \
+  jupyter
@@ -5,7 +5,7 @@
 
 __MANDATORY_PACKAGES__ = '''
 numpy>=1.9
-scikit-learn==0.18.1
+scikit-learn>=0.18.1,<0.19
 smac==0.5.0
 lockfile>=0.10
 ConfigSpace>=0.3.3,<0.4
 
@@ -1,4 +1,4 @@
 """Version information."""
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.2.0"
+__version__ = "0.2.1"
@@ -171,7 +171,7 @@ def fit(self, X, y,
             raise ValueError('No metric given.')
         if not isinstance(metric, Scorer):
             raise ValueError('Metric must be instance of '
-                             'autosklearn.metric.Scorer.')
+                             'autosklearn.metrics.Scorer.')
 
         if feat_type is not None and len(feat_type) != X.shape[1]:
             raise ValueError('Array feat_type does not have same number of '
@@ -531,8 +531,8 @@ def predict(self, X, batch_size=None, n_jobs=1):
                 self._resampling_strategy not in  \
                         ['holdout', 'holdout-iterative-fit']:
             raise NotImplementedError(
-                'Predict is currently only implemented for resampling '
-                'strategy %s.' % self._resampling_strategy)
+                'Predict is currently not implemented for resampling '
+                'strategy %s, please call refit().' % self._resampling_strategy)
 
         if self.models_ is None or len(self.models_) == 0 or \
                 self.ensemble_ is None:
@@ -764,12 +764,23 @@ def sprint_statistics(self):
                   'limit: %d\n' % num_memout)
         return sio.getvalue()
 
-    def show_models(self):
+    def get_models_with_weights(self):
         if self.models_ is None or len(self.models_) == 0 or \
                 self.ensemble_ is None:
             self._load_models()
 
-        return self.ensemble_.pprint_ensemble_string(self.models_)
+        return self.ensemble_.get_models_with_weights(self.models_)
+
+    def show_models(self):
+        models_with_weights = self.get_models_with_weights()
+
+        with io.StringIO() as sio:
+            sio.write("[")
+            for weight, model in models_with_weights:
+                sio.write("(%f, %s),\n" % (weight, model))
+            sio.write("]")
+
+            return sio.getvalue()
 
     def _create_search_space(self, tmp_dir, backend, datamanager,
                              include_estimators=None,
 
@@ -42,8 +42,8 @@ def predict(self, base_models_predictions):
         self
 
     @abstractmethod
-    def pprint_ensemble_string(self, models):
-        """Return a nicely-readable representation of the ensmble.
+    def get_models_with_weights(self, models):
+        """Return a list of (weight, model) pairs
 
         Parameters
         ----------
@@ -53,9 +53,10 @@ def pprint_ensemble_string(self, models):
 
         Returns
         -------
-        str
+        array : [(weight_1, model_1), ..., (weight_n, model_n)]
         """
 
+
     @abstractmethod
     def get_model_identifiers(self):
         """Return identifiers of models in the ensemble.
 
@@ -2,7 +2,6 @@
 import random
 
 import numpy as np
-import six
 
 from autosklearn.constants import *
 from autosklearn.ensembles.abstract_ensemble import AbstractEnsemble
@@ -204,9 +203,9 @@ def __str__(self):
                           enumerate(self.identifiers_)
                           if self.weights_[idx] > 0]))
 
-    def pprint_ensemble_string(self, models):
+    def get_models_with_weights(self, models):
         output = []
-        sio = six.StringIO()
+
         for i, weight in enumerate(self.weights_):
             identifier = self.identifiers_[i]
             model = models[identifier]
@@ -215,12 +214,7 @@ def pprint_ensemble_string(self, models):
 
         output.sort(reverse=True, key=lambda t: t[0])
 
-        sio.write("[")
-        for weight, model in output:
-            sio.write("(%f, %s),\n" % (weight, model))
-        sio.write("]")
-
-        return sio.getvalue()
+        return output
 
     def get_model_identifiers(self):
         return self.identifiers_
@@ -73,6 +73,16 @@ def show_models(self):
         """
         return self._automl.show_models()
 
+    def get_models_with_weights(self):
+        """Return a list of the final ensemble found by auto-sklearn.
+
+        Returns
+        -------
+        [(weight_1, model_1), ..., (weight_n, model_n)]
+
+        """
+        return self._automl.get_models_with_weights()
+
     @property
     def cv_results_(self):
         return self._automl.cv_results_
@@ -171,15 +181,17 @@ def __init__(self,
         resampling_strategy : string, optional ('holdout')
             how to to handle overfitting, might need 'resampling_strategy_arguments'
 
-            * 'holdout': 66:33 (train:test) split
-            * 'holdout-iterative-fit':  66:33 (train:test) split, calls iterative
+            * 'holdout': 67:33 (train:test) split
+            * 'holdout-iterative-fit':  67:33 (train:test) split, calls iterative
               fit where possible
             * 'cv': crossvalidation, requires 'folds'
 
-        resampling_strategy_arguments : dict, optional if 'holdout' (None)
+        resampling_strategy_arguments : dict, optional if 'holdout' (train_size default=0.67)
             Additional arguments for resampling_strategy
-            * 'holdout': None
-            * 'holdout-iterative-fit':  None
+            ``train_size`` should be between 0.0 and 1.0 and represent the
+            proportion of the dataset to include in the train split.
+            * 'holdout': {'train_size': float}
+            * 'holdout-iterative-fit':  {'train_size': float}
             * 'cv': {'folds': int}
 
         tmp_folder : string, optional (None)
@@ -339,7 +351,7 @@ def fit_ensemble(self, y, task=None, metric=None, precision='32',
             introduced in `Getting Most out of Ensemble Selection`.
 
         ensemble_size : int
-            Size of the ensemble built by `Ensomble Selection`.
+            Size of the ensemble built by `Ensemble Selection`.
 
         Returns
         -------
 
@@ -234,21 +234,24 @@ def run(self, config, instance=None,
 
     def get_splitter(self, D):
         y = D.data['Y_train'].ravel()
-
+        train_size = 0.67
+        if self.resampling_strategy_args:
+            train_size = self.resampling_strategy_args.get('train_size', train_size)
+        test_size = 1 - train_size
         if D.info['task'] in CLASSIFICATION_TASKS and \
                         D.info['task'] != MULTILABEL_CLASSIFICATION:
 
             if self.resampling_strategy in ['holdout',
                                             'holdout-iterative-fit']:
                 try:
-                    cv = StratifiedShuffleSplit(n_splits=1, train_size=0.67,
-                                                test_size=0.33, random_state=1)
+                    cv = StratifiedShuffleSplit(n_splits=1, train_size=train_size,
+                                                test_size=test_size, random_state=1)
                     test_cv = copy.deepcopy(cv)
                     next(test_cv.split(y, y))
                 except ValueError as e:
                     if 'The least populated class in y has only' in e.args[0]:
-                        cv = ShuffleSplit(n_splits=1, train_size=0.67,
-                                          test_size=0.33, random_state=1)
+                        cv = ShuffleSplit(n_splits=1, train_size=train_size,
+                                          test_size=test_size, random_state=1)
                     else:
                         raise
 
@@ -261,8 +264,8 @@ def get_splitter(self, D):
         else:
             if self.resampling_strategy in ['holdout',
                                             'holdout-iterative-fit']:
-                cv = ShuffleSplit(n_splits=1, train_size=0.67,
-                                  test_size=0.33, random_state=1)
+                cv = ShuffleSplit(n_splits=1, train_size=train_size,
+                                  test_size=test_size, random_state=1)
             elif self.resampling_strategy in ['cv', 'partial-cv',
                                               'partial-cv-iterative-fit']:
                 cv = KFold(n_splits=self.resampling_strategy_args['folds'],
 
@@ -136,8 +136,12 @@ def predict(self, X, batch_size=None):
         if batch_size is None:
             return super(BasePipeline, self).predict(X).astype(self._output_dtype)
         else:
-            if type(batch_size) is not int or batch_size <= 0:
-                raise Exception("batch_size must be a positive integer")
+            if not isinstance(batch_size, int):
+                raise ValueError("Argument 'batch_size' must be of type int, "
+                                 "but is '%s'" % type(batch_size))
+            if batch_size <= 0:
+                raise ValueError("Argument 'batch_size' must be positive, "
+                                 "but is %d" % batch_size)
 
             else:
                 if self.num_targets == 1: