Skip to content

Commit f4b72be

Browse files
authored
Merge pull request #354 from automl/development
Development
2 parents 7d33420 + 42430d1 commit f4b72be

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+911
-1069
lines changed

CHANGES.md

Lines changed: 0 additions & 3 deletions
This file was deleted.

Dockerfile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
FROM ubuntu
2+
3+
# System requirements
4+
RUN apt-get update && apt-get install -y \
5+
build-essential \
6+
curl \
7+
python3-pip \
8+
swig \
9+
&& rm -rf /var/lib/apt/lists/*
10+
11+
# Upgrade pip then install dependencies
12+
RUN pip3 install --upgrade pip
13+
RUN curl https://raw.githubusercontent.com/automl/auto-sklearn/master/requirements.txt \
14+
| xargs -n 1 -L 1 pip3 install
15+
16+
# Install
17+
RUN pip3 install \
18+
auto-sklearn \
19+
jupyter

autosklearn/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
__MANDATORY_PACKAGES__ = '''
77
numpy>=1.9
8-
scikit-learn==0.18.1
8+
scikit-learn>=0.18.1,<0.19
99
smac==0.5.0
1010
lockfile>=0.10
1111
ConfigSpace>=0.3.3,<0.4

autosklearn/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Version information."""
22

33
# The following line *must* be the last in the module, exactly as formatted:
4-
__version__ = "0.2.0"
4+
__version__ = "0.2.1"

autosklearn/automl.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def fit(self, X, y,
171171
raise ValueError('No metric given.')
172172
if not isinstance(metric, Scorer):
173173
raise ValueError('Metric must be instance of '
174-
'autosklearn.metric.Scorer.')
174+
'autosklearn.metrics.Scorer.')
175175

176176
if feat_type is not None and len(feat_type) != X.shape[1]:
177177
raise ValueError('Array feat_type does not have same number of '
@@ -531,8 +531,8 @@ def predict(self, X, batch_size=None, n_jobs=1):
531531
self._resampling_strategy not in \
532532
['holdout', 'holdout-iterative-fit']:
533533
raise NotImplementedError(
534-
'Predict is currently only implemented for resampling '
535-
'strategy %s.' % self._resampling_strategy)
534+
'Predict is currently not implemented for resampling '
535+
'strategy %s, please call refit().' % self._resampling_strategy)
536536

537537
if self.models_ is None or len(self.models_) == 0 or \
538538
self.ensemble_ is None:
@@ -764,12 +764,23 @@ def sprint_statistics(self):
764764
'limit: %d\n' % num_memout)
765765
return sio.getvalue()
766766

767-
def show_models(self):
767+
def get_models_with_weights(self):
768768
if self.models_ is None or len(self.models_) == 0 or \
769769
self.ensemble_ is None:
770770
self._load_models()
771771

772-
return self.ensemble_.pprint_ensemble_string(self.models_)
772+
return self.ensemble_.get_models_with_weights(self.models_)
773+
774+
def show_models(self):
775+
models_with_weights = self.get_models_with_weights()
776+
777+
with io.StringIO() as sio:
778+
sio.write("[")
779+
for weight, model in models_with_weights:
780+
sio.write("(%f, %s),\n" % (weight, model))
781+
sio.write("]")
782+
783+
return sio.getvalue()
773784

774785
def _create_search_space(self, tmp_dir, backend, datamanager,
775786
include_estimators=None,

autosklearn/ensembles/abstract_ensemble.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ def predict(self, base_models_predictions):
4242
self
4343

4444
@abstractmethod
45-
def pprint_ensemble_string(self, models):
46-
"""Return a nicely-readable representation of the ensmble.
45+
def get_models_with_weights(self, models):
46+
"""Return a list of (weight, model) pairs
4747
4848
Parameters
4949
----------
@@ -53,9 +53,10 @@ def pprint_ensemble_string(self, models):
5353
5454
Returns
5555
-------
56-
str
56+
array : [(weight_1, model_1), ..., (weight_n, model_n)]
5757
"""
5858

59+
5960
@abstractmethod
6061
def get_model_identifiers(self):
6162
"""Return identifiers of models in the ensemble.

autosklearn/ensembles/ensemble_selection.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import random
33

44
import numpy as np
5-
import six
65

76
from autosklearn.constants import *
87
from autosklearn.ensembles.abstract_ensemble import AbstractEnsemble
@@ -204,9 +203,9 @@ def __str__(self):
204203
enumerate(self.identifiers_)
205204
if self.weights_[idx] > 0]))
206205

207-
def pprint_ensemble_string(self, models):
206+
def get_models_with_weights(self, models):
208207
output = []
209-
sio = six.StringIO()
208+
210209
for i, weight in enumerate(self.weights_):
211210
identifier = self.identifiers_[i]
212211
model = models[identifier]
@@ -215,12 +214,7 @@ def pprint_ensemble_string(self, models):
215214

216215
output.sort(reverse=True, key=lambda t: t[0])
217216

218-
sio.write("[")
219-
for weight, model in output:
220-
sio.write("(%f, %s),\n" % (weight, model))
221-
sio.write("]")
222-
223-
return sio.getvalue()
217+
return output
224218

225219
def get_model_identifiers(self):
226220
return self.identifiers_

autosklearn/estimators.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ def show_models(self):
7373
"""
7474
return self._automl.show_models()
7575

76+
def get_models_with_weights(self):
77+
"""Return a list of the final ensemble found by auto-sklearn.
78+
79+
Returns
80+
-------
81+
[(weight_1, model_1), ..., (weight_n, model_n)]
82+
83+
"""
84+
return self._automl.get_models_with_weights()
85+
7686
@property
7787
def cv_results_(self):
7888
return self._automl.cv_results_
@@ -171,15 +181,17 @@ def __init__(self,
171181
resampling_strategy : string, optional ('holdout')
172182
how to to handle overfitting, might need 'resampling_strategy_arguments'
173183
174-
* 'holdout': 66:33 (train:test) split
175-
* 'holdout-iterative-fit': 66:33 (train:test) split, calls iterative
184+
* 'holdout': 67:33 (train:test) split
185+
* 'holdout-iterative-fit': 67:33 (train:test) split, calls iterative
176186
fit where possible
177187
* 'cv': crossvalidation, requires 'folds'
178188
179-
resampling_strategy_arguments : dict, optional if 'holdout' (None)
189+
resampling_strategy_arguments : dict, optional if 'holdout' (train_size default=0.67)
180190
Additional arguments for resampling_strategy
181-
* 'holdout': None
182-
* 'holdout-iterative-fit': None
191+
``train_size`` should be between 0.0 and 1.0 and represent the
192+
proportion of the dataset to include in the train split.
193+
* 'holdout': {'train_size': float}
194+
* 'holdout-iterative-fit': {'train_size': float}
183195
* 'cv': {'folds': int}
184196
185197
tmp_folder : string, optional (None)
@@ -339,7 +351,7 @@ def fit_ensemble(self, y, task=None, metric=None, precision='32',
339351
introduced in `Getting Most out of Ensemble Selection`.
340352
341353
ensemble_size : int
342-
Size of the ensemble built by `Ensomble Selection`.
354+
Size of the ensemble built by `Ensemble Selection`.
343355
344356
Returns
345357
-------

autosklearn/evaluation/__init__.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -234,21 +234,24 @@ def run(self, config, instance=None,
234234

235235
def get_splitter(self, D):
236236
y = D.data['Y_train'].ravel()
237-
237+
train_size = 0.67
238+
if self.resampling_strategy_args:
239+
train_size = self.resampling_strategy_args.get('train_size', train_size)
240+
test_size = 1 - train_size
238241
if D.info['task'] in CLASSIFICATION_TASKS and \
239242
D.info['task'] != MULTILABEL_CLASSIFICATION:
240243

241244
if self.resampling_strategy in ['holdout',
242245
'holdout-iterative-fit']:
243246
try:
244-
cv = StratifiedShuffleSplit(n_splits=1, train_size=0.67,
245-
test_size=0.33, random_state=1)
247+
cv = StratifiedShuffleSplit(n_splits=1, train_size=train_size,
248+
test_size=test_size, random_state=1)
246249
test_cv = copy.deepcopy(cv)
247250
next(test_cv.split(y, y))
248251
except ValueError as e:
249252
if 'The least populated class in y has only' in e.args[0]:
250-
cv = ShuffleSplit(n_splits=1, train_size=0.67,
251-
test_size=0.33, random_state=1)
253+
cv = ShuffleSplit(n_splits=1, train_size=train_size,
254+
test_size=test_size, random_state=1)
252255
else:
253256
raise
254257

@@ -261,8 +264,8 @@ def get_splitter(self, D):
261264
else:
262265
if self.resampling_strategy in ['holdout',
263266
'holdout-iterative-fit']:
264-
cv = ShuffleSplit(n_splits=1, train_size=0.67,
265-
test_size=0.33, random_state=1)
267+
cv = ShuffleSplit(n_splits=1, train_size=train_size,
268+
test_size=test_size, random_state=1)
266269
elif self.resampling_strategy in ['cv', 'partial-cv',
267270
'partial-cv-iterative-fit']:
268271
cv = KFold(n_splits=self.resampling_strategy_args['folds'],

autosklearn/pipeline/base.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,12 @@ def predict(self, X, batch_size=None):
136136
if batch_size is None:
137137
return super(BasePipeline, self).predict(X).astype(self._output_dtype)
138138
else:
139-
if type(batch_size) is not int or batch_size <= 0:
140-
raise Exception("batch_size must be a positive integer")
139+
if not isinstance(batch_size, int):
140+
raise ValueError("Argument 'batch_size' must be of type int, "
141+
"but is '%s'" % type(batch_size))
142+
if batch_size <= 0:
143+
raise ValueError("Argument 'batch_size' must be positive, "
144+
"but is %d" % batch_size)
141145

142146
else:
143147
if self.num_targets == 1:

0 commit comments

Comments
 (0)