Skip to content

Commit 7d6a05c

Browse files
authored
Merge pull request #896 from automl/development
Release 0.8.0
2 parents 9cb198c + 23dc82d commit 7d6a05c

File tree

97 files changed

+9639
-79
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+9639
-79
lines changed

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ recursive-include autosklearn/metalearning/files *.txt
44
include autosklearn/util/logging.yaml
55
recursive-include autosklearn *.pyx
66
include requirements.txt
7+
recursive-include autosklearn/experimental/askl2_portfolios *.json
8+
include autosklearn/experimental/askl2_training_data.json

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,28 @@ auto-sklearn is an automated machine learning toolkit and a drop-in replacement
44

55
Find the documentation [here](http://automl.github.io/auto-sklearn/)
66

7+
## Automated Machine Learning in four lines of code
8+
9+
```python
10+
import autosklearn.classification
11+
cls = autosklearn.classification.AutoSklearnClassifier()
12+
cls.fit(X_train, y_train)
13+
predictions = cls.predict(X_test)
14+
```
15+
16+
## Relevant publications
17+
18+
Efficient and Robust Automated Machine Learning
19+
Matthias Feurer, Aaron Klein, Katharina Eggensperger, Jost Springenberg, Manuel Blum and Frank Hutter
20+
Advances in Neural Information Processing Systems 28 (2015)
21+
http://papers.nips.cc/paper/5872-efficient-and-robust-automated-machine-learning.pdf
22+
23+
Auto-Sklearn 2.0: The Next Generation
24+
Authors: Matthias Feurer, Katharina Eggensperger, Stefan Falkner, Marius Lindauer and Frank Hutter
25+
To appear
26+
27+
## Status
28+
729
Status for master branch
830

931
[![Build Status](https://travis-ci.org/automl/auto-sklearn.svg?branch=master)](https://travis-ci.org/automl/auto-sklearn)

autosklearn/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Version information."""
22

33
# The following line *must* be the last in the module, exactly as formatted:
4-
__version__ = "0.7.1"
4+
__version__ = "0.8.0"

autosklearn/automl.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from autosklearn.util.hash import hash_array_or_matrix
3737
from autosklearn.metrics import f1_macro, accuracy, r2
3838
from autosklearn.constants import MULTILABEL_CLASSIFICATION, MULTICLASS_CLASSIFICATION, \
39-
REGRESSION_TASKS, REGRESSION, BINARY_CLASSIFICATION
39+
REGRESSION_TASKS, REGRESSION, BINARY_CLASSIFICATION, MULTIOUTPUT_REGRESSION
4040

4141

4242
def _model_predict(model, X, batch_size, logger, task):
@@ -936,13 +936,16 @@ def _check_X(self, X):
936936

937937
def _check_y(self, y):
938938
y = sklearn.utils.check_array(y, ensure_2d=False)
939-
940939
y = np.atleast_1d(y)
941-
if y.ndim == 2 and y.shape[1] == 1:
940+
941+
if y.ndim == 1:
942+
return y
943+
elif y.ndim == 2 and y.shape[1] == 1:
942944
warnings.warn("A column-vector y was passed when a 1d array was"
943945
" expected. Will change shape via np.ravel().",
944946
sklearn.utils.DataConversionWarning, stacklevel=2)
945947
y = np.ravel(y)
948+
return y
946949

947950
return y
948951

@@ -1097,6 +1100,9 @@ def predict_proba(self, X, batch_size=None, n_jobs=1):
10971100
class AutoMLRegressor(BaseAutoML):
10981101
def __init__(self, *args, **kwargs):
10991102
super().__init__(*args, **kwargs)
1103+
self._task_mapping = {'continuous-multioutput': MULTIOUTPUT_REGRESSION,
1104+
'continuous': REGRESSION,
1105+
'multiclass': REGRESSION}
11001106

11011107
def fit(
11021108
self,
@@ -1110,17 +1116,20 @@ def fit(
11101116
load_models: bool = True,
11111117
):
11121118
X, y = super()._perform_input_checks(X, y)
1113-
_n_outputs = 1 if len(y.shape) == 1 else y.shape[1]
1114-
if _n_outputs > 1:
1115-
raise NotImplementedError(
1116-
'Multi-output regression is not implemented.')
1119+
y_task = type_of_target(y)
1120+
task = self._task_mapping.get(y_task)
1121+
if task is None:
1122+
raise ValueError('Cannot work on data of type %s' % y_task)
1123+
11171124
if self._metric is None:
11181125
self._metric = r2
1126+
1127+
self._n_outputs = 1 if len(y.shape) == 1 else y.shape[1]
11191128
return super().fit(
11201129
X, y,
11211130
X_test=X_test,
11221131
y_test=y_test,
1123-
task=REGRESSION,
1132+
task=task,
11241133
feat_type=feat_type,
11251134
dataset_name=dataset_name,
11261135
only_return_configuration_space=only_return_configuration_space,

autosklearn/constants.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
MULTICLASS_CLASSIFICATION = 2
55
MULTILABEL_CLASSIFICATION = 3
66
REGRESSION = 4
7+
MULTIOUTPUT_REGRESSION = 5
78

8-
REGRESSION_TASKS = [REGRESSION]
9+
REGRESSION_TASKS = [REGRESSION, MULTIOUTPUT_REGRESSION]
910
CLASSIFICATION_TASKS = [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION,
1011
MULTILABEL_CLASSIFICATION]
1112

@@ -15,10 +16,12 @@
1516
{BINARY_CLASSIFICATION: 'binary.classification',
1617
MULTICLASS_CLASSIFICATION: 'multiclass.classification',
1718
MULTILABEL_CLASSIFICATION: 'multilabel.classification',
18-
REGRESSION: 'regression'}
19+
REGRESSION: 'regression',
20+
MULTIOUTPUT_REGRESSION: 'multioutput.regression'}
1921

2022
STRING_TO_TASK_TYPES = \
2123
{'binary.classification': BINARY_CLASSIFICATION,
2224
'multiclass.classification': MULTICLASS_CLASSIFICATION,
2325
'multilabel.classification': MULTILABEL_CLASSIFICATION,
24-
'regression': REGRESSION}
26+
'regression': REGRESSION,
27+
'multioutput.regression': MULTIOUTPUT_REGRESSION}

autosklearn/data/xy_data_manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from scipy import sparse
55

66
from autosklearn.constants import STRING_TO_TASK_TYPES, REGRESSION, BINARY_CLASSIFICATION, \
7-
MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION
7+
MULTICLASS_CLASSIFICATION, MULTILABEL_CLASSIFICATION, MULTIOUTPUT_REGRESSION
88
from autosklearn.data.abstract_data_manager import AbstractDataManager
99

1010

@@ -27,6 +27,7 @@ def __init__(self, X, y, X_test, y_test, task, feat_type, dataset_name):
2727
label_num = {
2828
REGRESSION: 1,
2929
BINARY_CLASSIFICATION: 2,
30+
MULTIOUTPUT_REGRESSION: y.shape[-1],
3031
MULTICLASS_CLASSIFICATION: len(np.unique(y)),
3132
MULTILABEL_CLASSIFICATION: y.shape[-1]
3233
}

autosklearn/estimators.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -766,15 +766,15 @@ def fit(self, X, y,
766766
X : array-like or sparse matrix of shape = [n_samples, n_features]
767767
The training input samples.
768768
769-
y : array-like, shape = [n_samples]
769+
y : array-like, shape = [n_samples] or [n_samples, n_targets]
770770
The regression target.
771771
772772
X_test : array-like or sparse matrix of shape = [n_samples, n_features]
773773
Test data input samples. Will be used to save test predictions for
774774
all models. This allows to evaluate the performance of Auto-sklearn
775775
over time.
776776
777-
y_test : array-like, shape = [n_samples]
777+
y_test : array-like, shape = [n_samples] or [n_samples, n_targets]
778778
The regression target. Will be used to calculate the test error
779779
of all models. This allows to evaluate the performance of
780780
Auto-sklearn over time.
@@ -799,7 +799,6 @@ def fit(self, X, y,
799799
target_type = type_of_target(y)
800800
if target_type in ['multiclass-multioutput',
801801
'multilabel-indicator',
802-
'continuous-multioutput',
803802
'unknown',
804803
]:
805804
raise ValueError("regression with data of type %s is not"

autosklearn/evaluation/abstract_evaluator.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
REGRESSION_TASKS,
1616
MULTILABEL_CLASSIFICATION,
1717
MULTICLASS_CLASSIFICATION,
18+
MULTIOUTPUT_REGRESSION
1819
)
1920
from autosklearn.pipeline.implementations.util import (
2021
convert_multioutput_multiclass_to_multilabel
@@ -204,12 +205,19 @@ def _get_model(self):
204205
random_state=self.seed,
205206
init_params=self._init_params)
206207
else:
207-
dataset_properties = {
208-
'task': self.task_type,
209-
'sparse': self.datamanager.info['is_sparse'] == 1,
210-
'multilabel': self.task_type == MULTILABEL_CLASSIFICATION,
211-
'multiclass': self.task_type == MULTICLASS_CLASSIFICATION,
212-
}
208+
if self.task_type in REGRESSION_TASKS:
209+
dataset_properties = {
210+
'task': self.task_type,
211+
'sparse': self.datamanager.info['is_sparse'] == 1,
212+
'multioutput': self.task_type == MULTIOUTPUT_REGRESSION,
213+
}
214+
else:
215+
dataset_properties = {
216+
'task': self.task_type,
217+
'sparse': self.datamanager.info['is_sparse'] == 1,
218+
'multilabel': self.task_type == MULTILABEL_CLASSIFICATION,
219+
'multiclass': self.task_type == MULTICLASS_CLASSIFICATION,
220+
}
213221
model = self.model_class(config=self.configuration,
214222
dataset_properties=dataset_properties,
215223
random_state=self.seed,

autosklearn/experimental/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)