Skip to content

Commit 822d33a

Browse files
committed
MAINT check and update examples
1 parent b2c7316 commit 822d33a

File tree

4 files changed

+32
-39
lines changed

4 files changed

+32
-39
lines changed

example/example_crossvalidation.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- encoding: utf-8 -*-
2+
import sklearn.cross_validation
23
import sklearn.datasets
3-
import numpy as np
4+
import sklearn.metrics
45

56
import autosklearn.classification
67

@@ -9,14 +10,9 @@ def main():
910
digits = sklearn.datasets.load_digits()
1011
X = digits.data
1112
y = digits.target
12-
indices = np.arange(X.shape[0])
13-
np.random.shuffle(indices)
14-
X = X[indices]
15-
y = y[indices]
16-
X_train = X[:1000]
17-
y_train = y[:1000]
18-
X_test = X[1000:]
19-
y_test = y[1000:]
13+
X_train, X_test, y_train, y_test = \
14+
sklearn.cross_validation.train_test_split(X, y, random_state=1)
15+
2016
automl = autosklearn.classification.AutoSklearnClassifier(
2117
time_left_for_this_task=120, per_run_time_limit=30,
2218
tmp_folder='/tmp/autoslearn_cv_example_tmp',
@@ -27,6 +23,9 @@ def main():
2723
# fit() changes the data in place, but refit needs the original data. We
2824
# therefore copy the data. In practice, one should reload the data
2925
automl.fit(X_train.copy(), y_train.copy(), dataset_name='digits')
26+
# During fit(), models are fit on individual cross-validation folds. To use
27+
# all available data, we call refit() which trains all models in the
28+
# final ensemble on the whole dataset.
3029
automl.refit(X_train.copy(), y_train.copy())
3130

3231
print(automl.show_models())

example/example_holdout.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from operator import itemgetter
22

33
import numpy as np
4+
import sklearn.cross_validation
45
import sklearn.datasets
56
import sklearn.metrics
67

@@ -24,14 +25,9 @@ def main():
2425
digits = sklearn.datasets.load_digits()
2526
X = digits.data
2627
y = digits.target
27-
indices = np.arange(X.shape[0])
28-
np.random.shuffle(indices)
29-
X = X[indices]
30-
y = y[indices]
31-
X_train = X[:1000]
32-
y_train = y[:1000]
33-
X_test = X[1000:]
34-
y_test = y[1000:]
28+
X_train, X_test, y_train, y_test = \
29+
sklearn.cross_validation.train_test_split(X, y, random_state=1)
30+
3531
automl = autosklearn.classification.AutoSklearnClassifier(
3632
time_left_for_this_task=120, per_run_time_limit=30,
3733
tmp_folder='/tmp/autoslearn_holdout_example_tmp',
@@ -42,8 +38,11 @@ def main():
4238
# unreasonably bad (around 0.0) you should have a look into the logging
4339
# file to figure out the error
4440
report(automl.grid_scores_)
41+
# Print the final ensemble constructed by auto-sklearn.
4542
print(automl.show_models())
4643
predictions = automl.predict(X_test)
44+
# Print statistics about the auto-sklearn run such as number of
45+
# iterations, number of models failed with a time out.
4746
print(automl.sprint_statistics())
4847
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
4948

example/example_parallel.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
# -*- encoding: utf-8 -*-
22
import multiprocessing
3-
import numpy as np
43
import shutil
4+
5+
import sklearn.cross_validation
56
import sklearn.datasets
67
import sklearn.metrics
8+
79
from autosklearn.classification import AutoSklearnClassifier
810
from autosklearn.constants import *
911

1012
tmp_folder = '/tmp/autosklearn_parallel_example_tmp'
1113
output_folder = '/tmp/autosklearn_parallel_example_out'
1214

13-
try:
14-
shutil.rmtree(tmp_folder)
15-
except OSError as e:
16-
pass
17-
try:
18-
shutil.rmtree(output_folder)
19-
except OSError:
20-
pass
15+
16+
for dir in [tmp_folder, output_folder]:
17+
try:
18+
shutil.rmtree(dir)
19+
except OSError as e:
20+
pass
21+
2122

2223
def spawn_classifier(seed, dataset_name):
2324
"""Spawn a subprocess.
@@ -59,14 +60,8 @@ def spawn_classifier(seed, dataset_name):
5960
digits = sklearn.datasets.load_digits()
6061
X = digits.data
6162
y = digits.target
62-
indices = np.arange(X.shape[0])
63-
np.random.shuffle(indices)
64-
X = X[indices]
65-
y = y[indices]
66-
X_train = X[:1000]
67-
y_train = y[:1000]
68-
X_test = X[1000:]
69-
y_test = y[1000:]
63+
X_train, X_test, y_train, y_test = \
64+
sklearn.cross_validation.train_test_split(X, y, random_state=1)
7065

7166
processes = []
7267
for i in range(4): # set this at roughly half of your cores

example/example_regression.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
# -*- encoding: utf-8 -*-
2-
import numpy as np
2+
import sklearn.cross_validation
33
import sklearn.datasets
44
import sklearn.metrics
5-
from sklearn.cross_validation import train_test_split
5+
66
import autosklearn.regression
77

88

99
def main():
1010
boston = sklearn.datasets.load_boston()
1111
X = boston.data
1212
y = boston.target
13-
indices = np.arange(X.shape[0])
14-
np.random.shuffle(indices)
15-
X_train, X_test, y_train, y_test = train_test_split(X, y)
13+
X_train, X_test, y_train, y_test = \
14+
sklearn.cross_validation.train_test_split(X, y, random_state=1)
15+
1616
automl = autosklearn.regression.AutoSklearnRegressor(
1717
time_left_for_this_task=120, per_run_time_limit=30,
1818
tmp_folder='/tmp/autoslearn_regression_example_tmp',

0 commit comments

Comments
 (0)