|
| 1 | +# -*- encoding: utf-8 -*- |
| 2 | +import numpy as np |
| 3 | + |
| 4 | +import sklearn.model_selection |
| 5 | +import sklearn.datasets |
| 6 | +import sklearn.metrics |
| 7 | + |
| 8 | +import autosklearn.classification |
| 9 | +import autosklearn.metrics |
| 10 | + |
| 11 | +try: |
| 12 | + import openml |
| 13 | +except ImportError: |
| 14 | + print("#"*80 + """ |
| 15 | + To run this example you need to install openml-python: |
| 16 | +
|
| 17 | + git+https://github.com/renatopp/liac-arff |
| 18 | + # OpenML is currently not on pypi, use an old version to not depend on |
| 19 | + # scikit-learn 0.18 |
| 20 | + requests |
| 21 | + xmltodict |
| 22 | + git+https://github.com/renatopp/liac-arff |
| 23 | + git+https://github.com/openml/""" + |
| 24 | + "openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" + |
| 25 | + "#"*80) |
| 26 | + raise |
| 27 | + |
| 28 | + |
| 29 | +def accuracy(solution, prediction): |
| 30 | + return np.mean(solution == prediction) |
| 31 | + |
| 32 | +def accuracy_with_kwargs(solution, prediction, ) |
| 33 | + |
| 34 | +def main(): |
| 35 | + # Load adult dataset from openml.org, see https://www.openml.org/t/2117 |
| 36 | + openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de' |
| 37 | + |
| 38 | + task = openml.tasks.get_task(2117) |
| 39 | + train_indices, test_indices = task.get_train_test_split_indices() |
| 40 | + X, y = task.get_X_and_y() |
| 41 | + |
| 42 | + X_train = X[train_indices] |
| 43 | + y_train = y[train_indices] |
| 44 | + X_test = X[test_indices] |
| 45 | + y_test = y[test_indices] |
| 46 | + |
| 47 | + dataset = task.get_dataset() |
| 48 | + _, _, categorical_indicator = dataset.\ |
| 49 | + get_data(target=task.target_name, return_categorical_indicator=True) |
| 50 | + |
| 51 | + # Create feature type list from openml.org indicator and run autosklearn |
| 52 | + feat_type = ['categorical' if ci else 'numerical' |
| 53 | + for ci in categorical_indicator] |
| 54 | + |
| 55 | + # Run auto-sklearn with our metric |
| 56 | + accuracy_scorer = autosklearn.metrics.make_scorer(name="accu_self", |
| 57 | + score_func=accuracy, |
| 58 | + greater_is_better=True, |
| 59 | + needs_proba=False, |
| 60 | + needs_threshold=False) |
| 61 | + cls = autosklearn.classification.\ |
| 62 | + AutoSklearnClassifier(time_left_for_this_task=60, |
| 63 | + per_run_time_limit=30) |
| 64 | + cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer) |
| 65 | + |
| 66 | + predictions = cls.predict(X_test) |
| 67 | + print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions)) |
| 68 | + |
| 69 | + |
| 70 | + |
| 71 | + |
| 72 | + |
| 73 | +if __name__ == "__main__": |
| 74 | + main() |
0 commit comments