|
| 1 | +from openml.testing import TestBase |
| 2 | + |
| 3 | + |
| 4 | +class TestStudyFunctions(TestBase): |
| 5 | + _multiprocess_can_split_ = True |
| 6 | + """Test the example code of Bischl et al. (2018)""" |
| 7 | + |
| 8 | + def test_Figure1a(self): |
| 9 | + """Test listing in Figure 1a on a single task and the old OpenML100 study. |
| 10 | + |
| 11 | + The original listing is pasted into the comment below because it the |
| 12 | + actual unit test differs a bit, as for example it does not run for all tasks, |
| 13 | + but only a single one. |
| 14 | +
|
| 15 | + import openml |
| 16 | + import sklearn.tree, sklearn.preprocessing |
| 17 | + benchmark_suite = openml.study.get_study('OpenML-CC18','tasks') # obtain the benchmark suite |
| 18 | + clf = sklearn.pipeline.Pipeline(steps=[('imputer',sklearn.preprocessing.Imputer()), ('estimator',sklearn.tree.DecisionTreeClassifier())]) # build a sklearn classifier |
| 19 | + for task_id in benchmark_suite.tasks: # iterate over all tasks |
| 20 | + task = openml.tasks.get_task(task_id) # download the OpenML task |
| 21 | + X, y = task.get_X_and_y() # get the data (not used in this example) |
| 22 | + openml.config.apikey = 'FILL_IN_OPENML_API_KEY' # set the OpenML Api Key |
| 23 | + run = openml.runs.run_model_on_task(task,clf) # run classifier on splits (requires API key) |
| 24 | + score = run.get_metric_fn(sklearn.metrics.accuracy_score) # print accuracy score |
| 25 | + print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name,score.mean())) |
| 26 | + run.publish() # publish the experiment on OpenML (optional) |
| 27 | + print('URL for run: %s/run/%d' %(openml.config.server,run.run_id)) |
| 28 | + """ |
| 29 | + import openml |
| 30 | + import sklearn.tree, sklearn.preprocessing |
| 31 | + benchmark_suite = openml.study.get_study( |
| 32 | + 'OpenML100', 'tasks' |
| 33 | + ) # obtain the benchmark suite |
| 34 | + clf = sklearn.pipeline.Pipeline( |
| 35 | + steps=[ |
| 36 | + ('imputer', sklearn.preprocessing.Imputer()), |
| 37 | + ('estimator', sklearn.tree.DecisionTreeClassifier()) |
| 38 | + ] |
| 39 | + ) # build a sklearn classifier |
| 40 | + for task_id in benchmark_suite.tasks[:1]: # iterate over all tasks |
| 41 | + task = openml.tasks.get_task(task_id) # download the OpenML task |
| 42 | + X, y = task.get_X_and_y() # get the data (not used in this example) |
| 43 | + openml.config.apikey = openml.config.apikey # set the OpenML Api Key |
| 44 | + run = openml.runs.run_model_on_task( |
| 45 | + task, clf, |
| 46 | + ) # run classifier on splits (requires API key) |
| 47 | + score = run.get_metric_fn( |
| 48 | + sklearn.metrics.accuracy_score |
| 49 | + ) # print accuracy score |
| 50 | + print('Data set: %s; Accuracy: %0.2f' % ( |
| 51 | + task.get_dataset().name, score.mean())) |
| 52 | + run.publish() # publish the experiment on OpenML (optional) |
| 53 | + print('URL for run: %s/run/%d' % (openml.config.server, run.run_id)) |
0 commit comments