Skip to content

Commit 938175d

Browse files
authored
update examples to make unittests pass (#1023)
* update examples to make unittests pass * reorganize examples a bit
1 parent 3743b25 commit 938175d

19 files changed

+1280
-1238
lines changed

doc/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#},
7272
'backreferences_dir': None,
7373
'filename_pattern': 'example.*.py$',
74+
'ignore_pattern': r'custom_metrics\.py|__init__\.py'
7475
}
7576

7677
# Add any paths that contain templates here, relative to this directory.

doc/index.rst

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,14 @@ Example
4141
>>> import sklearn.model_selection
4242
>>> import sklearn.datasets
4343
>>> import sklearn.metrics
44-
>>> X, y = sklearn.datasets.load_digits(return_X_y=True)
45-
>>> X_train, X_test, y_train, y_test = \
46-
sklearn.model_selection.train_test_split(X, y, random_state=1)
47-
>>> automl = autosklearn.classification.AutoSklearnClassifier()
48-
>>> automl.fit(X_train, y_train)
49-
>>> y_hat = automl.predict(X_test)
50-
>>> print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_hat))
44+
>>> if __name__ == "__main__":
45+
>>> X, y = sklearn.datasets.load_digits(return_X_y=True)
46+
>>> X_train, X_test, y_train, y_test = \
47+
sklearn.model_selection.train_test_split(X, y, random_state=1)
48+
>>> automl = autosklearn.classification.AutoSklearnClassifier()
49+
>>> automl.fit(X_train, y_train)
50+
>>> y_hat = automl.predict(X_test)
51+
>>> print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_hat))
5152

5253

5354
This will run for one hour and should result in an accuracy above 0.98.

examples/20_basic/example_classification.py

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,35 +13,36 @@
1313
import autosklearn.classification
1414

1515

16-
############################################################################
17-
# Data Loading
18-
# ============
19-
20-
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
21-
X_train, X_test, y_train, y_test = \
22-
sklearn.model_selection.train_test_split(X, y, random_state=1)
23-
24-
############################################################################
25-
# Build and fit a regressor
26-
# =========================
27-
28-
automl = autosklearn.classification.AutoSklearnClassifier(
29-
time_left_for_this_task=120,
30-
per_run_time_limit=30,
31-
tmp_folder='/tmp/autosklearn_classification_example_tmp',
32-
output_folder='/tmp/autosklearn_classification_example_out',
33-
)
34-
automl.fit(X_train, y_train, dataset_name='breast_cancer')
35-
36-
############################################################################
37-
# Print the final ensemble constructed by auto-sklearn
38-
# ====================================================
39-
40-
print(automl.show_models())
41-
42-
###########################################################################
43-
# Get the Score of the final ensemble
44-
# ===================================
45-
46-
predictions = automl.predict(X_test)
47-
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
16+
if __name__ == "__main__":
17+
############################################################################
18+
# Data Loading
19+
# ============
20+
21+
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
22+
X_train, X_test, y_train, y_test = \
23+
sklearn.model_selection.train_test_split(X, y, random_state=1)
24+
25+
############################################################################
26+
# Build and fit a regressor
27+
# =========================
28+
29+
automl = autosklearn.classification.AutoSklearnClassifier(
30+
time_left_for_this_task=120,
31+
per_run_time_limit=30,
32+
tmp_folder='/tmp/autosklearn_classification_example_tmp',
33+
output_folder='/tmp/autosklearn_classification_example_out',
34+
)
35+
automl.fit(X_train, y_train, dataset_name='breast_cancer')
36+
37+
############################################################################
38+
# Print the final ensemble constructed by auto-sklearn
39+
# ====================================================
40+
41+
print(automl.show_models())
42+
43+
###########################################################################
44+
# Get the Score of the final ensemble
45+
# ===================================
46+
47+
predictions = automl.predict(X_test)
48+
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

examples/20_basic/example_multilabel_classification.py

Lines changed: 59 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -13,65 +13,65 @@
1313
import sklearn.metrics
1414
from sklearn.utils.multiclass import type_of_target
1515

16-
1716
import autosklearn.classification
1817

1918

20-
############################################################################
21-
# Data Loading
22-
# ============
23-
24-
# Using reuters multilabel dataset -- https://www.openml.org/d/40594
25-
X, y = sklearn.datasets.fetch_openml(data_id=40594, return_X_y=True, as_frame=False)
26-
27-
# fetch openml downloads a numpy array with TRUE/FALSE strings. Re-map it to
28-
# integer dtype with ones and zeros
29-
# This is to comply with Scikit-learn requirement:
30-
# "Positive classes are indicated with 1 and negative classes with 0 or -1."
31-
# More information on: https://scikit-learn.org/stable/modules/multiclass.html
32-
y[y == 'TRUE'] = 1
33-
y[y == 'FALSE'] = 0
34-
y = y.astype(np.int)
35-
36-
# Using type of target is a good way to make sure your data
37-
# is properly formatted
38-
print(f"type_of_target={type_of_target(y)}")
39-
40-
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
41-
X, y, random_state=1
42-
)
43-
44-
############################################################################
45-
# Building the classifier
46-
# =======================
47-
48-
automl = autosklearn.classification.AutoSklearnClassifier(
49-
time_left_for_this_task=60,
50-
per_run_time_limit=30,
51-
# Bellow two flags are provided to speed up calculations
52-
# Not recommended for a real implementation
53-
initial_configurations_via_metalearning=0,
54-
smac_scenario_args={'runcount_limit': 1},
55-
)
56-
automl.fit(X_train, y_train, dataset_name='reuters')
57-
58-
############################################################################
59-
# Print the final ensemble constructed by auto-sklearn
60-
# ====================================================
61-
62-
print(automl.show_models())
63-
64-
############################################################################
65-
# Print statistics about the auto-sklearn run
66-
# ===========================================
67-
68-
# Print statistics about the auto-sklearn run such as number of
69-
# iterations, number of models failed with a time out.
70-
print(automl.sprint_statistics())
71-
72-
############################################################################
73-
# Get the Score of the final ensemble
74-
# ===================================
75-
76-
predictions = automl.predict(X_test)
77-
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
19+
if __name__ == "__main__":
20+
############################################################################
21+
# Data Loading
22+
# ============
23+
24+
# Using reuters multilabel dataset -- https://www.openml.org/d/40594
25+
X, y = sklearn.datasets.fetch_openml(data_id=40594, return_X_y=True, as_frame=False)
26+
27+
# fetch openml downloads a numpy array with TRUE/FALSE strings. Re-map it to
28+
# integer dtype with ones and zeros
29+
# This is to comply with Scikit-learn requirement:
30+
# "Positive classes are indicated with 1 and negative classes with 0 or -1."
31+
# More information on: https://scikit-learn.org/stable/modules/multiclass.html
32+
y[y == 'TRUE'] = 1
33+
y[y == 'FALSE'] = 0
34+
y = y.astype(np.int)
35+
36+
# Using type of target is a good way to make sure your data
37+
# is properly formatted
38+
print(f"type_of_target={type_of_target(y)}")
39+
40+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
41+
X, y, random_state=1
42+
)
43+
44+
############################################################################
45+
# Building the classifier
46+
# =======================
47+
48+
automl = autosklearn.classification.AutoSklearnClassifier(
49+
time_left_for_this_task=60,
50+
per_run_time_limit=30,
51+
# Bellow two flags are provided to speed up calculations
52+
# Not recommended for a real implementation
53+
initial_configurations_via_metalearning=0,
54+
smac_scenario_args={'runcount_limit': 1},
55+
)
56+
automl.fit(X_train, y_train, dataset_name='reuters')
57+
58+
############################################################################
59+
# Print the final ensemble constructed by auto-sklearn
60+
# ====================================================
61+
62+
print(automl.show_models())
63+
64+
############################################################################
65+
# Print statistics about the auto-sklearn run
66+
# ===========================================
67+
68+
# Print statistics about the auto-sklearn run such as number of
69+
# iterations, number of models failed with a time out.
70+
print(automl.sprint_statistics())
71+
72+
############################################################################
73+
# Get the Score of the final ensemble
74+
# ===================================
75+
76+
predictions = automl.predict(X_test)
77+
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

examples/20_basic/example_regression.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,36 +13,37 @@
1313
import autosklearn.regression
1414

1515

16-
############################################################################
17-
# Data Loading
18-
# ============
16+
if __name__ == "__main__":
17+
############################################################################
18+
# Data Loading
19+
# ============
1920

20-
X, y = sklearn.datasets.load_boston(return_X_y=True)
21+
X, y = sklearn.datasets.load_boston(return_X_y=True)
2122

22-
X_train, X_test, y_train, y_test = \
23-
sklearn.model_selection.train_test_split(X, y, random_state=1)
23+
X_train, X_test, y_train, y_test = \
24+
sklearn.model_selection.train_test_split(X, y, random_state=1)
2425

25-
############################################################################
26-
# Build and fit a regressor
27-
# =========================
26+
############################################################################
27+
# Build and fit a regressor
28+
# =========================
2829

29-
automl = autosklearn.regression.AutoSklearnRegressor(
30-
time_left_for_this_task=120,
31-
per_run_time_limit=30,
32-
tmp_folder='/tmp/autosklearn_regression_example_tmp',
33-
output_folder='/tmp/autosklearn_regression_example_out',
34-
)
35-
automl.fit(X_train, y_train, dataset_name='boston')
30+
automl = autosklearn.regression.AutoSklearnRegressor(
31+
time_left_for_this_task=120,
32+
per_run_time_limit=30,
33+
tmp_folder='/tmp/autosklearn_regression_example_tmp',
34+
output_folder='/tmp/autosklearn_regression_example_out',
35+
)
36+
automl.fit(X_train, y_train, dataset_name='boston')
3637

37-
############################################################################
38-
# Print the final ensemble constructed by auto-sklearn
39-
# ====================================================
38+
############################################################################
39+
# Print the final ensemble constructed by auto-sklearn
40+
# ====================================================
4041

41-
print(automl.show_models())
42+
print(automl.show_models())
4243

43-
###########################################################################
44-
# Get the Score of the final ensemble
45-
# ===================================
44+
###########################################################################
45+
# Get the Score of the final ensemble
46+
# ===================================
4647

47-
predictions = automl.predict(X_test)
48-
print("R2 score:", sklearn.metrics.r2_score(y_test, predictions))
48+
predictions = automl.predict(X_test)
49+
print("R2 score:", sklearn.metrics.r2_score(y_test, predictions))

examples/40_advanced/__init__.py

Whitespace-only changes.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Custom metrics to be used by example_metrics.py
2+
3+
They reside in a different file so they can be used by Auto-sklearn."""
4+
5+
import numpy as np
6+
7+
8+
############################################################################
9+
# Custom metrics definition
10+
# =========================
11+
12+
def accuracy(solution, prediction):
13+
# custom function defining accuracy
14+
return np.mean(solution == prediction)
15+
16+
17+
def error(solution, prediction):
18+
# custom function defining error
19+
return np.mean(solution != prediction)
20+
21+
22+
def accuracy_wk(solution, prediction, dummy):
23+
# custom function defining accuracy and accepting an additional argument
24+
assert dummy is None
25+
return np.mean(solution == prediction)
26+
27+
28+
def error_wk(solution, prediction, dummy):
29+
# custom function defining error and accepting an additional argument
30+
assert dummy is None
31+
return np.mean(solution != prediction)

examples/40_advanced/example_feature_types.py

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,40 +21,41 @@
2121
import autosklearn.classification
2222

2323

24-
############################################################################
25-
# Data Loading
26-
# ============
27-
# Load Australian dataset from https://www.openml.org/d/40981
28-
bunch = data = sklearn.datasets.fetch_openml(data_id=40981, as_frame=True)
29-
y = bunch['target'].to_numpy()
30-
X = bunch['data'].to_numpy(np.float)
31-
32-
X_train, X_test, y_train, y_test = \
33-
sklearn.model_selection.train_test_split(X, y, random_state=1)
34-
35-
# Auto-sklearn can automatically recognize categorical/numerical data from a pandas
36-
# DataFrame. This example highlights how the user can provide the feature types,
37-
# when using numpy arrays, as there is no per-column dtype in this case.
38-
# feat_type is a list that tags each column from a DataFrame/ numpy array / list
39-
# with the case-insensitive string categorical or numerical, accordingly.
40-
feat_type = ['Categorical' if x.name == 'category' else 'Numerical' for x in bunch['data'].dtypes]
41-
42-
############################################################################
43-
# Build and fit a classifier
44-
# ==========================
45-
46-
cls = autosklearn.classification.AutoSklearnClassifier(
47-
time_left_for_this_task=30,
48-
# Bellow two flags are provided to speed up calculations
49-
# Not recommended for a real implementation
50-
initial_configurations_via_metalearning=0,
51-
smac_scenario_args={'runcount_limit': 1},
52-
)
53-
cls.fit(X_train, y_train, X_test, y_test, feat_type=feat_type)
54-
55-
###########################################################################
56-
# Get the Score of the final ensemble
57-
# ===================================
58-
59-
predictions = cls.predict(X_test)
60-
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
24+
if __name__ == "__main__":
25+
############################################################################
26+
# Data Loading
27+
# ============
28+
# Load Australian dataset from https://www.openml.org/d/40981
29+
bunch = data = sklearn.datasets.fetch_openml(data_id=40981, as_frame=True)
30+
y = bunch['target'].to_numpy()
31+
X = bunch['data'].to_numpy(np.float)
32+
33+
X_train, X_test, y_train, y_test = \
34+
sklearn.model_selection.train_test_split(X, y, random_state=1)
35+
36+
# Auto-sklearn can automatically recognize categorical/numerical data from a pandas
37+
# DataFrame. This example highlights how the user can provide the feature types,
38+
# when using numpy arrays, as there is no per-column dtype in this case.
39+
# feat_type is a list that tags each column from a DataFrame/ numpy array / list
40+
# with the case-insensitive string categorical or numerical, accordingly.
41+
feat_type = ['Categorical' if x.name == 'category' else 'Numerical' for x in bunch['data'].dtypes]
42+
43+
############################################################################
44+
# Build and fit a classifier
45+
# ==========================
46+
47+
cls = autosklearn.classification.AutoSklearnClassifier(
48+
time_left_for_this_task=30,
49+
# Bellow two flags are provided to speed up calculations
50+
# Not recommended for a real implementation
51+
initial_configurations_via_metalearning=0,
52+
smac_scenario_args={'runcount_limit': 1},
53+
)
54+
cls.fit(X_train, y_train, X_test, y_test, feat_type=feat_type)
55+
56+
###########################################################################
57+
# Get the Score of the final ensemble
58+
# ===================================
59+
60+
predictions = cls.predict(X_test)
61+
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

0 commit comments

Comments
 (0)