|
13 | 13 | import sklearn.metrics |
14 | 14 | from sklearn.utils.multiclass import type_of_target |
15 | 15 |
|
16 | | - |
17 | 16 | import autosklearn.classification |
18 | 17 |
|
19 | 18 |
|
20 | | -############################################################################ |
21 | | -# Data Loading |
22 | | -# ============ |
23 | | - |
24 | | -# Using reuters multilabel dataset -- https://www.openml.org/d/40594 |
25 | | -X, y = sklearn.datasets.fetch_openml(data_id=40594, return_X_y=True, as_frame=False) |
26 | | - |
27 | | -# fetch openml downloads a numpy array with TRUE/FALSE strings. Re-map it to |
28 | | -# integer dtype with ones and zeros |
29 | | -# This is to comply with Scikit-learn requirement: |
30 | | -# "Positive classes are indicated with 1 and negative classes with 0 or -1." |
31 | | -# More information on: https://scikit-learn.org/stable/modules/multiclass.html |
32 | | -y[y == 'TRUE'] = 1 |
33 | | -y[y == 'FALSE'] = 0 |
34 | | -y = y.astype(np.int) |
35 | | - |
36 | | -# Using type of target is a good way to make sure your data |
37 | | -# is properly formatted |
38 | | -print(f"type_of_target={type_of_target(y)}") |
39 | | - |
40 | | -X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( |
41 | | - X, y, random_state=1 |
42 | | -) |
43 | | - |
44 | | -############################################################################ |
45 | | -# Building the classifier |
46 | | -# ======================= |
47 | | - |
48 | | -automl = autosklearn.classification.AutoSklearnClassifier( |
49 | | - time_left_for_this_task=60, |
50 | | - per_run_time_limit=30, |
51 | | - # Bellow two flags are provided to speed up calculations |
52 | | - # Not recommended for a real implementation |
53 | | - initial_configurations_via_metalearning=0, |
54 | | - smac_scenario_args={'runcount_limit': 1}, |
55 | | -) |
56 | | -automl.fit(X_train, y_train, dataset_name='reuters') |
57 | | - |
58 | | -############################################################################ |
59 | | -# Print the final ensemble constructed by auto-sklearn |
60 | | -# ==================================================== |
61 | | - |
62 | | -print(automl.show_models()) |
63 | | - |
64 | | -############################################################################ |
65 | | -# Print statistics about the auto-sklearn run |
66 | | -# =========================================== |
67 | | - |
68 | | -# Print statistics about the auto-sklearn run such as number of |
69 | | -# iterations, number of models failed with a time out. |
70 | | -print(automl.sprint_statistics()) |
71 | | - |
72 | | -############################################################################ |
73 | | -# Get the Score of the final ensemble |
74 | | -# =================================== |
75 | | - |
76 | | -predictions = automl.predict(X_test) |
77 | | -print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions)) |
| 19 | +if __name__ == "__main__": |
| 20 | + ############################################################################ |
| 21 | + # Data Loading |
| 22 | + # ============ |
| 23 | + |
| 24 | + # Using reuters multilabel dataset -- https://www.openml.org/d/40594 |
| 25 | + X, y = sklearn.datasets.fetch_openml(data_id=40594, return_X_y=True, as_frame=False) |
| 26 | + |
| 27 | + # fetch openml downloads a numpy array with TRUE/FALSE strings. Re-map it to |
| 28 | + # integer dtype with ones and zeros |
| 29 | + # This is to comply with Scikit-learn requirement: |
| 30 | + # "Positive classes are indicated with 1 and negative classes with 0 or -1." |
| 31 | + # More information on: https://scikit-learn.org/stable/modules/multiclass.html |
| 32 | + y[y == 'TRUE'] = 1 |
| 33 | + y[y == 'FALSE'] = 0 |
| 34 | + y = y.astype(np.int) |
| 35 | + |
| 36 | + # Using type of target is a good way to make sure your data |
| 37 | + # is properly formatted |
| 38 | + print(f"type_of_target={type_of_target(y)}") |
| 39 | + |
| 40 | + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( |
| 41 | + X, y, random_state=1 |
| 42 | + ) |
| 43 | + |
| 44 | + ############################################################################ |
| 45 | + # Building the classifier |
| 46 | + # ======================= |
| 47 | + |
| 48 | + automl = autosklearn.classification.AutoSklearnClassifier( |
| 49 | + time_left_for_this_task=60, |
| 50 | + per_run_time_limit=30, |
| 51 | + # Bellow two flags are provided to speed up calculations |
| 52 | + # Not recommended for a real implementation |
| 53 | + initial_configurations_via_metalearning=0, |
| 54 | + smac_scenario_args={'runcount_limit': 1}, |
| 55 | + ) |
| 56 | + automl.fit(X_train, y_train, dataset_name='reuters') |
| 57 | + |
| 58 | + ############################################################################ |
| 59 | + # Print the final ensemble constructed by auto-sklearn |
| 60 | + # ==================================================== |
| 61 | + |
| 62 | + print(automl.show_models()) |
| 63 | + |
| 64 | + ############################################################################ |
| 65 | + # Print statistics about the auto-sklearn run |
| 66 | + # =========================================== |
| 67 | + |
| 68 | + # Print statistics about the auto-sklearn run such as number of |
| 69 | + # iterations, number of models failed with a time out. |
| 70 | + print(automl.sprint_statistics()) |
| 71 | + |
| 72 | + ############################################################################ |
| 73 | + # Get the Score of the final ensemble |
| 74 | + # =================================== |
| 75 | + |
| 76 | + predictions = automl.predict(X_test) |
| 77 | + print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions)) |
0 commit comments