Skip to content

Commit 7f891fa

Browse files
committed
simplify and fix examples
1 parent 4643aa5 commit 7f891fa

File tree

8 files changed

+24
-68
lines changed

8 files changed

+24
-68
lines changed

autosklearn/estimators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def fit_ensemble(self, y, task=None, metric=None, precision='32',
321321
the task type (binary classification, multiclass classification,
322322
multilabel classification or regression).
323323
324-
metric : callable, optional (default='acc_metric')
324+
metric : callable, optional
325325
An instance of :class:`autosklearn.metrics.Scorer` as created by
326326
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
327327
Metrics`_.
@@ -463,7 +463,7 @@ def fit(self, X, y,
463463
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
464464
The regression target.
465465
466-
metric : callable, optional (default='autosklearn.metrics.accuracy')
466+
metric : callable, optional (default='autosklearn.metrics.r2')
467467
An instance of :class:`autosklearn.metrics.Scorer` as created by
468468
:meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
469469
Metrics`_.

example/example_crossvalidation.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77

88

99
def main():
10-
digits = sklearn.datasets.load_digits()
11-
X = digits.data
12-
y = digits.target
10+
X, y = sklearn.datasets.load_digits(return_X_y=True)
1311
X_train, X_test, y_train, y_test = \
1412
sklearn.model_selection.train_test_split(X, y, random_state=1)
1513

example/example_feature_types.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,9 @@
1111
print("#"*80 + """
1212
To run this example you need to install openml-python:
1313
14-
git+https://github.com/renatopp/liac-arff
15-
# OpenML is currently not on pypi, use an old version to not depend on
16-
# scikit-learn 0.18
17-
requests
18-
xmltodict
19-
git+https://github.com/renatopp/liac-arff
20-
git+https://github.com/openml/""" +
21-
"openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
14+
pip install git+https://github.com/renatopp/liac-arff
15+
pip install requests xmltodict
16+
pip install git+https://github.com/openml/openml-python@develop --no-deps\n""" +
2217
"#"*80)
2318
raise
2419

@@ -41,7 +36,7 @@ def main():
4136
get_data(target=task.target_name, return_categorical_indicator=True)
4237

4338
# Create feature type list from openml.org indicator and run autosklearn
44-
feat_type = ['categorical' if ci else 'numerical'
39+
feat_type = ['Categorical' if ci else 'Numerical'
4540
for ci in categorical_indicator]
4641

4742
cls = autosklearn.classification.\

example/example_holdout.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66

77

88
def main():
9-
digits = sklearn.datasets.load_digits()
10-
X = digits.data
11-
y = digits.target
9+
X, y = sklearn.datasets.load_digits(return_X_y=True)
1210
X_train, X_test, y_train, y_test = \
1311
sklearn.model_selection.train_test_split(X, y, random_state=1)
1412

example/example_metrics.py

Lines changed: 8 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,6 @@
88
import autosklearn.classification
99
import autosklearn.metrics
1010

11-
try:
12-
import openml
13-
except ImportError:
14-
print("#"*80 + """
15-
To run this example you need to install openml-python:
16-
17-
git+https://github.com/renatopp/liac-arff
18-
# OpenML is currently not on pypi, use an old version to not depend on
19-
# scikit-learn 0.18
20-
requests
21-
xmltodict
22-
git+https://github.com/renatopp/liac-arff
23-
git+https://github.com/openml/""" +
24-
"openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
25-
"#"*80)
26-
raise
2711

2812

2913
def accuracy(solution, prediction):
@@ -38,25 +22,10 @@ def accuracy_wk(solution, prediction, dummy):
3822

3923

4024
def main():
41-
# Load adult dataset from openml.org, see https://www.openml.org/t/2117
42-
openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
4325

44-
task = openml.tasks.get_task(2117)
45-
train_indices, test_indices = task.get_train_test_split_indices()
46-
X, y = task.get_X_and_y()
47-
48-
X_train = X[train_indices]
49-
y_train = y[train_indices]
50-
X_test = X[test_indices]
51-
y_test = y[test_indices]
52-
53-
dataset = task.get_dataset()
54-
_, _, categorical_indicator = dataset.\
55-
get_data(target=task.target_name, return_categorical_indicator=True)
56-
57-
# Create feature type list from openml.org indicator and run autosklearn
58-
feat_type = ['categorical' if ci else 'numerical'
59-
for ci in categorical_indicator]
26+
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
27+
X_train, X_test, y_train, y_test = \
28+
sklearn.model_selection.train_test_split(X, y, random_state=1)
6029

6130
# Print a list of available metrics
6231
print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
@@ -71,14 +40,14 @@ def main():
7140
cls = autosklearn.classification.\
7241
AutoSklearnClassifier(time_left_for_this_task=60,
7342
per_run_time_limit=30, seed=1)
74-
cls.fit(X_train, y_train, feat_type=feat_type,
75-
metric=autosklearn.metrics.accuracy)
43+
cls.fit(X_train, y_train, metric=autosklearn.metrics.accuracy)
7644

7745
predictions = cls.predict(X_test)
7846
print("Accuracy score {:g} using {:s}".
7947
format(sklearn.metrics.accuracy_score(y_test, predictions),
8048
cls._automl._automl._metric.name))
8149

50+
# Second example: Use own accuracy metric
8251
print("#"*80)
8352
print("Use self defined accuracy accuracy metric")
8453
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu",
@@ -89,13 +58,14 @@ def main():
8958
cls = autosklearn.classification.\
9059
AutoSklearnClassifier(time_left_for_this_task=60,
9160
per_run_time_limit=30, seed=1)
92-
cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
61+
cls.fit(X_train, y_train, metric=accuracy_scorer)
9362

9463
predictions = cls.predict(X_test)
9564
print("Accuracy score {:g} using {:s}".
9665
format(sklearn.metrics.accuracy_score(y_test, predictions),
9766
cls._automl._automl._metric.name))
9867

68+
# Third example: Use own accuracy metric with additional argument
9969
print("#"*80)
10070
print("Use self defined accuracy with additional argument")
10171
accuracy_scorer = autosklearn.metrics.make_scorer(name="accu_add",
@@ -107,7 +77,7 @@ def main():
10777
cls = autosklearn.classification.\
10878
AutoSklearnClassifier(time_left_for_this_task=60,
10979
per_run_time_limit=30, seed=1)
110-
cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
80+
cls.fit(X_train, y_train, metric=accuracy_scorer)
11181

11282
predictions = cls.predict(X_test)
11383
print("Accuracy score {:g} using {:s}".

example/example_parallel.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sklearn.datasets
77
import sklearn.metrics
88

9+
from autosklearn.metrics import accuracy
910
from autosklearn.classification import AutoSklearnClassifier
1011
from autosklearn.constants import *
1112

@@ -42,10 +43,10 @@ def spawn_classifier(seed, dataset_name):
4243
# models.
4344
# 3. all instances of the AutoSklearnClassifier must have a different seed!
4445
automl = AutoSklearnClassifier(
45-
time_left_for_this_task=120, # sec., how long should this seed fit
46+
time_left_for_this_task=60, # sec., how long should this seed fit
4647
# process run
47-
per_run_time_limit=60, # sec., each model may only take this long before it's killed
48-
ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
48+
per_run_time_limit=15, # sec., each model may only take this long before it's killed
49+
ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
4950
shared_mode=True, # tmp folder will be shared between seeds
5051
tmp_folder=tmp_folder,
5152
output_folder=output_folder,
@@ -57,9 +58,7 @@ def spawn_classifier(seed, dataset_name):
5758

5859
if __name__ == '__main__':
5960

60-
digits = sklearn.datasets.load_digits()
61-
X = digits.data
62-
y = digits.target
61+
X, y = sklearn.datasets.load_digits(return_X_y=True)
6362
X_train, X_test, y_train, y_test = \
6463
sklearn.model_selection.train_test_split(X, y, random_state=1)
6564

@@ -87,7 +86,7 @@ def spawn_classifier(seed, dataset_name):
8786
# necessary
8887
automl.fit_ensemble(y_train,
8988
task=MULTICLASS_CLASSIFICATION,
90-
metric=ACC_METRIC,
89+
metric=accuracy,
9190
precision='32',
9291
dataset_name='digits',
9392
ensemble_size=20,

example/example_regression.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77

88

99
def main():
10-
boston = sklearn.datasets.load_boston()
11-
X = boston.data
12-
y = boston.target
10+
X, y = sklearn.datasets.load_boston(return_X_y=True)
1311
feature_types = (['numerical'] * 3) + ['categorical'] + (['numerical'] * 9)
1412
X_train, X_test, y_train, y_test = \
1513
sklearn.model_selection.train_test_split(X, y, random_state=1)

example/example_sequential.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66

77

88
def main():
9-
digits = sklearn.datasets.load_digits()
10-
X = digits.data
11-
y = digits.target
9+
X, y = sklearn.datasets.load_digits(return_X_y=True)
1210
X_train, X_test, y_train, y_test = \
1311
sklearn.model_selection.train_test_split(X, y, random_state=1)
1412

0 commit comments

Comments
 (0)