Skip to content

Commit ff0e706

Browse files
authored
Merge pull request #167 from Ayaro/development
Fix parallel example exception
2 parents 2a2d2ad + 8fe709e commit ff0e706

File tree

4 files changed

+79
-25
lines changed

4 files changed

+79
-25
lines changed

autosklearn/automl.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def predict(self, X):
471471
predictions = self.ensemble_.predict(all_predictions)
472472
return predictions
473473

474-
def fit_ensemble(self, task=None, metric=None, precision='32',
474+
def fit_ensemble(self, y, task=None, metric=None, precision='32',
475475
dataset_name=None, ensemble_nbest=None,
476476
ensemble_size=None):
477477
if self._logger is None:
@@ -490,16 +490,33 @@ def _get_ensemble_process(self, time_left_for_ensembles,
490490

491491
if task is None:
492492
task = self._task
493+
else:
494+
self._task = task
495+
493496
if metric is None:
494497
metric = self._metric
498+
else:
499+
self._metric = metric
500+
495501
if precision is None:
496502
precision = self.precision
503+
else:
504+
self.precision = precision
505+
497506
if dataset_name is None:
498507
dataset_name = self._dataset_name
508+
else:
509+
self._dataset_name = dataset_name
510+
499511
if ensemble_nbest is None:
500512
ensemble_nbest = self._ensemble_nbest
513+
else:
514+
self._ensemble_nbest = ensemble_nbest
515+
501516
if ensemble_size is None:
502517
ensemble_size = self._ensemble_size
518+
else:
519+
self._ensemble_size = ensemble_size
503520

504521
return EnsembleBuilder(backend=self._backend,
505522
dataset_name=dataset_name,

autosklearn/estimators.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def refit(self, X, y):
4747
"""
4848
return self._automl.refit(X, y)
4949

50-
def fit_ensemble(self, task=None, metric=None, precision='32',
50+
def fit_ensemble(self, y, task=None, metric=None, precision='32',
5151
dataset_name=None, ensemble_nbest=None,
5252
ensemble_size=None):
5353
"""Build the ensemble.
@@ -58,7 +58,7 @@ def fit_ensemble(self, task=None, metric=None, precision='32',
5858
-------
5959
self
6060
"""
61-
return self._automl.fit_ensemble(task, metric, precision,
61+
return self._automl.fit_ensemble(y, task, metric, precision,
6262
dataset_name, ensemble_nbest,
6363
ensemble_size)
6464

@@ -253,12 +253,12 @@ def fit(self, *args, **kwargs):
253253
self._automl = self.build_automl()
254254
super(AutoSklearnEstimator, self).fit(*args, **kwargs)
255255

256-
def fit_ensemble(self, task=None, metric=None, precision='32',
256+
def fit_ensemble(self, y, task=None, metric=None, precision='32',
257257
dataset_name=None, ensemble_nbest=None,
258258
ensemble_size=None):
259259
if self._automl is None:
260260
self._automl = self.build_automl()
261-
return self._automl.fit_ensemble(task, metric, precision,
261+
return self._automl.fit_ensemble(y, task, metric, precision,
262262
dataset_name, ensemble_nbest,
263263
ensemble_size)
264264

@@ -426,6 +426,27 @@ def fit(self, X, y,
426426
force_all_finite=False)
427427
if scipy.sparse.issparse(X):
428428
X.sort_indices()
429+
430+
y = self._process_target_classes(y)
431+
432+
if self._n_outputs > 1:
433+
task = MULTILABEL_CLASSIFICATION
434+
else:
435+
if len(self._classes[0]) == 2:
436+
task = BINARY_CLASSIFICATION
437+
else:
438+
task = MULTICLASS_CLASSIFICATION
439+
440+
return self._automl.fit(X, y, task, metric, feat_type, dataset_name)
441+
442+
def fit_ensemble(self, y, task=None, metric=None, precision='32',
443+
dataset_name=None, ensemble_nbest=None,
444+
ensemble_size=None):
445+
self._process_target_classes(y)
446+
return self._automl.fit_ensemble(y, task, metric, precision, dataset_name,
447+
ensemble_nbest, ensemble_size)
448+
449+
def _process_target_classes(self, y):
429450
y = np.atleast_1d(y)
430451
if y.ndim == 2 and y.shape[1] == 1:
431452
warnings.warn("A column-vector y was passed when a 1d array was"
@@ -452,19 +473,12 @@ def fit(self, X, y,
452473

453474
self._n_classes = np.array(self._n_classes, dtype=np.int)
454475

455-
if self._n_outputs > 1:
456-
task = MULTILABEL_CLASSIFICATION
457-
else:
458-
if len(self._classes[0]) == 2:
459-
task = BINARY_CLASSIFICATION
460-
else:
461-
task = MULTICLASS_CLASSIFICATION
462-
463476
# TODO: fix metafeatures calculation to allow this!
464477
if y.shape[1] == 1:
465478
y = y.flatten()
466479

467-
return self._automl.fit(X, y, task, metric, feat_type, dataset_name)
480+
return y
481+
468482

469483
def predict(self, X):
470484
predicted_probabilities = self._automl.predict(X)

example/example_parallel.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
# -*- encoding: utf-8 -*-
22
import multiprocessing
3-
43
import numpy as np
5-
4+
import shutil
65
import sklearn.datasets
76
import sklearn.metrics
8-
97
from autosklearn.classification import AutoSklearnClassifier
108
from autosklearn.constants import *
119

1210
tmp_folder = '/tmp/autosklearn_parallel_example_tmp'
1311
output_folder = '/tmp/autosklearn_parallel_example_out'
1412

13+
try:
14+
shutil.rmtree(tmp_folder)
15+
except OSError as e:
16+
pass
17+
try:
18+
shutil.rmtree(output_folder)
19+
except OSError:
20+
pass
1521

1622
def spawn_classifier(seed, dataset_name):
1723
"""Spawn a subprocess.
@@ -84,7 +90,8 @@ def spawn_classifier(seed, dataset_name):
8490

8591
# Both the ensemble_size and ensemble_nbest parameters can be changed now if
8692
# necessary
87-
automl.fit_ensemble(task=MULTICLASS_CLASSIFICATION,
93+
automl.fit_ensemble(y_train,
94+
task=MULTICLASS_CLASSIFICATION,
8895
metric=ACC_METRIC,
8996
precision='32',
9097
dataset_name='digits',

test/test_automl/test_estimators.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import sys
66
import unittest
77

8+
import sklearn
9+
810
try:
911
import mock
1012
except ImportError:
@@ -17,7 +19,7 @@
1719
import autosklearn.pipeline.util as putil
1820
from autosklearn.classification import AutoSklearnClassifier
1921
from autosklearn.estimators import AutoMLClassifier
20-
from autosklearn.util.backend import Backend
22+
from autosklearn.util.backend import Backend, BackendContext
2123
from autosklearn.constants import *
2224

2325
sys.path.append(os.path.dirname(__file__))
@@ -93,13 +95,16 @@ def test_feat_type_wrong_arguments(self):
9395
cls.fit,
9496
X=X, y=y, feat_type=['Car']*100)
9597

96-
@unittest.skip("pSMAC not yet working with new python SMAC")
9798
def test_fit_pSMAC(self):
9899
output = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC')
99100
self._setUp(output)
100101

101102
X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
102103

104+
# test parallel Classifier to predict classes, not only indexes
105+
Y_train = Y_train + 1
106+
Y_test = Y_test + 1
107+
103108
automl = AutoSklearnClassifier(time_left_for_this_task=15,
104109
per_run_time_limit=15,
105110
output_folder=output,
@@ -128,10 +133,11 @@ def test_fit_pSMAC(self):
128133

129134
probas_test = np.zeros((len(Y_test), 3), dtype=float)
130135
for i, value in enumerate(Y_test):
131-
probas_test[i, value] = 1.0
136+
probas_test[i, value - 1] = 1.0
132137

133138
dummy = ArrayReturningDummyPredictor(probas_test)
134-
backend = Backend(output, output)
139+
context = BackendContext(output, output, False, False)
140+
backend = Backend(context)
135141
backend.save_model(dummy, 30, 1)
136142

137143
automl = AutoSklearnClassifier(time_left_for_this_task=15,
@@ -142,16 +148,26 @@ def test_fit_pSMAC(self):
142148
seed=2,
143149
initial_configurations_via_metalearning=0,
144150
ensemble_size=0)
145-
automl.fit(X_train, Y_train)
146-
automl.run_ensemble_builder(0, 1, 50).wait()
151+
automl.fit_ensemble(Y_train,
152+
task=MULTICLASS_CLASSIFICATION,
153+
metric=ACC_METRIC,
154+
precision='32',
155+
dataset_name='iris',
156+
ensemble_size=20,
157+
ensemble_nbest=50)
147158

148-
score = automl.score(X_test, Y_test)
159+
predictions = automl.predict(X_test)
160+
score = sklearn.metrics.accuracy_score(Y_test, predictions)
149161

150162
self.assertEqual(len(os.listdir(os.path.join(output, '.auto-sklearn',
151163
'ensembles'))), 1)
152164
self.assertGreaterEqual(score, 0.90)
153165
self.assertEqual(automl._automl._automl._task, MULTICLASS_CLASSIFICATION)
154166

167+
models = automl._automl._automl.models_
168+
classifier_types = [type(c) for c in models.values()]
169+
self.assertIn(ArrayReturningDummyPredictor, classifier_types)
170+
155171
del automl
156172
self._tearDown(output)
157173

0 commit comments

Comments
 (0)