Skip to content

Commit 4643aa5

Browse files
authored
Merge pull request #282 from automl/FIX#121
Fix#121
2 parents e87e812 + 4026717 commit 4643aa5

File tree

4 files changed

+187
-11
lines changed

4 files changed

+187
-11
lines changed

autosklearn/estimators.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from autosklearn.util.backend import create
1515

1616

17-
1817
class AutoMLDecorator(object):
1918

2019
def __init__(self, automl):
@@ -379,7 +378,7 @@ def fit(self, X, y,
379378
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
380379
The target classes.
381380
382-
metric : callable, optional (default='acc_metric')
381+
metric : callable, optional (default='autosklearn.metrics.accuracy')
383382
An instance of :class:`autosklearn.metrics.Scorer` as created by
384383
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
385384
Metrics`_.
@@ -388,7 +387,7 @@ def fit(self, X, y,
388387
List of str of `len(X.shape[1])` describing the attribute type.
389388
Possible types are `Categorical` and `Numerical`. `Categorical`
390389
attributes will be automatically One-Hot encoded. The values
391-
used for a categorical attribute must be integers, obtainde for
390+
used for a categorical attribute must be integers, obtained for
392391
example by `sklearn.preprocessing.LabelEncoder
393392
<http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html>`_.
394393
@@ -464,11 +463,10 @@ def fit(self, X, y,
464463
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
465464
The regression target.
466465
467-
metric : str, optional (default='r2_metric')
468-
The metric to optimize for. Can be one of: ['r2_metric',
469-
'a_metric']. A description of the metrics can be found in
470-
`the paper describing the AutoML Challenge
471-
<http://www.causality.inf.ethz.ch/AutoML/automl_ijcnn15.pdf>`_.
466+
metric : callable, optional (default='autosklearn.metrics.accuracy')
467+
An instance of :class:`autosklearn.metrics.Scorer` as created by
468+
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
469+
Metrics`_.
472470
473471
feat_type : list, optional (default=None)
474472
List of str of `len(X.shape[1])` describing the attribute type.
@@ -541,7 +539,7 @@ def fit(self, X, y,
541539
if task == MULTILABEL_CLASSIFICATION:
542540
metric = f1_macro
543541
else:
544-
metric=accuracy
542+
metric = accuracy
545543

546544
y = self._process_target_classes(y)
547545

@@ -586,7 +584,6 @@ def _process_target_classes(self, y):
586584

587585
return y
588586

589-
590587
def predict(self, X, batch_size=None, n_jobs=1):
591588
predicted_probabilities = self._automl.predict(
592589
X, batch_size=batch_size, n_jobs=n_jobs)

doc/manual.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ aspects of its usage:
2020
* `Parallel usage <https://github.com/automl/auto-sklearn/blob/master/example/example_parallel.py>`_
2121
* `Sequential usage <https://github.com/automl/auto-sklearn/blob/master/example/example_sequential.py>`_
2222
* `Regression <https://github.com/automl/auto-sklearn/blob/master/example/example_regression.py>`_
23+
* `Continuous and Categorical Data <https://github.com/automl/auto-sklearn/blob/master/example/example_feature_types.py>`_
24+
* `Using Custom metrics <https://github.com/automl/auto-sklearn/blob/master/example/example_metrics.py>`_
25+
2326

2427
Time and memory limits
2528
======================
@@ -64,7 +67,7 @@ For a full list please have a look at the source code (in `autosklearn/pipeline/
6467
* `Regressors <https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/regression>`_
6568
* `Preprocessors <https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/feature_preprocessing>`_
6669

67-
Turning of preprocessing
70+
Turning off preprocessing
6871
~~~~~~~~~~~~~~~~~~~~~~~~
6972

7073
Preprocessing in *auto-sklearn* is divided into data preprocessing and

example/example_feature_types.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# -*- encoding: utf-8 -*-
2+
import sklearn.model_selection
3+
import sklearn.datasets
4+
import sklearn.metrics
5+
6+
import autosklearn.classification
7+
8+
try:
9+
import openml
10+
except ImportError:
11+
print("#"*80 + """
12+
To run this example you need to install openml-python:
13+
14+
git+https://github.com/renatopp/liac-arff
15+
# OpenML is currently not on pypi, use an old version to not depend on
16+
# scikit-learn 0.18
17+
requests
18+
xmltodict
19+
git+https://github.com/renatopp/liac-arff
20+
git+https://github.com/openml/""" +
21+
"openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
22+
"#"*80)
23+
raise
24+
25+
26+
def main():
27+
# Load adult dataset from openml.org, see https://www.openml.org/t/2117
28+
openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
29+
30+
task = openml.tasks.get_task(2117)
31+
train_indices, test_indices = task.get_train_test_split_indices()
32+
X, y = task.get_X_and_y()
33+
34+
X_train = X[train_indices]
35+
y_train = y[train_indices]
36+
X_test = X[test_indices]
37+
y_test = y[test_indices]
38+
39+
dataset = task.get_dataset()
40+
_, _, categorical_indicator = dataset.\
41+
get_data(target=task.target_name, return_categorical_indicator=True)
42+
43+
# Create feature type list from openml.org indicator and run autosklearn
44+
feat_type = ['categorical' if ci else 'numerical'
45+
for ci in categorical_indicator]
46+
47+
cls = autosklearn.classification.\
48+
AutoSklearnClassifier(time_left_for_this_task=120,
49+
per_run_time_limit=30)
50+
cls.fit(X_train, y_train, feat_type=feat_type)
51+
52+
predictions = cls.predict(X_test)
53+
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
54+
55+
56+
if __name__ == "__main__":
57+
main()

example/example_metrics.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# -*- encoding: utf-8 -*-
2+
import numpy as np
3+
4+
import sklearn.model_selection
5+
import sklearn.datasets
6+
import sklearn.metrics
7+
8+
import autosklearn.classification
9+
import autosklearn.metrics
10+
11+
try:
12+
import openml
13+
except ImportError:
14+
print("#"*80 + """
15+
To run this example you need to install openml-python:
16+
17+
git+https://github.com/renatopp/liac-arff
18+
# OpenML is currently not on pypi, use an old version to not depend on
19+
# scikit-learn 0.18
20+
requests
21+
xmltodict
22+
git+https://github.com/renatopp/liac-arff
23+
git+https://github.com/openml/""" +
24+
"openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
25+
"#"*80)
26+
raise
27+
28+
29+
def accuracy(solution, prediction):
30+
# function defining accuracy
31+
return np.mean(solution == prediction)
32+
33+
34+
def accuracy_wk(solution, prediction, dummy):
35+
# function defining accuracy and accepting an additional argument
36+
assert dummy is None
37+
return np.mean(solution == prediction)
38+
39+
40+
def main():
41+
# Load adult dataset from openml.org, see https://www.openml.org/t/2117
42+
openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
43+
44+
task = openml.tasks.get_task(2117)
45+
train_indices, test_indices = task.get_train_test_split_indices()
46+
X, y = task.get_X_and_y()
47+
48+
X_train = X[train_indices]
49+
y_train = y[train_indices]
50+
X_test = X[test_indices]
51+
y_test = y[test_indices]
52+
53+
dataset = task.get_dataset()
54+
_, _, categorical_indicator = dataset.\
55+
get_data(target=task.target_name, return_categorical_indicator=True)
56+
57+
# Create feature type list from openml.org indicator and run autosklearn
58+
feat_type = ['categorical' if ci else 'numerical'
59+
for ci in categorical_indicator]
60+
61+
# Print a list of available metrics
62+
print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
63+
print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS))
64+
65+
print("Available REGRESSION autosklearn.metrics.*:")
66+
print("\t*" + "\n\t*".join(autosklearn.metrics.REGRESSION_METRICS))
67+
68+
# First example: Use predefined accuracy metric
69+
print("#"*80)
70+
print("Use predefined accuracy metric")
71+
cls = autosklearn.classification.\
72+
AutoSklearnClassifier(time_left_for_this_task=60,
73+
per_run_time_limit=30, seed=1)
74+
cls.fit(X_train, y_train, feat_type=feat_type,
75+
metric=autosklearn.metrics.accuracy)
76+
77+
predictions = cls.predict(X_test)
78+
print("Accuracy score {:g} using {:s}".
79+
format(sklearn.metrics.accuracy_score(y_test, predictions),
80+
cls._automl._automl._metric.name))
81+
82+
print("#"*80)
83+
print("Use self defined accuracy accuracy metric")
84+
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu",
85+
score_func=accuracy,
86+
greater_is_better=True,
87+
needs_proba=False,
88+
needs_threshold=False)
89+
cls = autosklearn.classification.\
90+
AutoSklearnClassifier(time_left_for_this_task=60,
91+
per_run_time_limit=30, seed=1)
92+
cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
93+
94+
predictions = cls.predict(X_test)
95+
print("Accuracy score {:g} using {:s}".
96+
format(sklearn.metrics.accuracy_score(y_test, predictions),
97+
cls._automl._automl._metric.name))
98+
99+
print("#"*80)
100+
print("Use self defined accuracy with additional argument")
101+
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu_add",
102+
score_func=accuracy_wk,
103+
greater_is_better=True,
104+
needs_proba=False,
105+
needs_threshold=False,
106+
dummy=None)
107+
cls = autosklearn.classification.\
108+
AutoSklearnClassifier(time_left_for_this_task=60,
109+
per_run_time_limit=30, seed=1)
110+
cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
111+
112+
predictions = cls.predict(X_test)
113+
print("Accuracy score {:g} using {:s}".
114+
format(sklearn.metrics.accuracy_score(y_test, predictions),
115+
cls._automl._automl._metric.name))
116+
117+
118+
if __name__ == "__main__":
119+
main()

0 commit comments

Comments
 (0)