Skip to content

Commit 5b701bb

Browse files
authored
ADD unit test to ensure example listing (#421)
* ADD unit test to ensure example listing * Update test_study_examples.py
1 parent 5058e1d commit 5b701bb

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from openml.testing import TestBase
2+
3+
4+
class TestStudyFunctions(TestBase):
5+
_multiprocess_can_split_ = True
6+
"""Test the example code of Bischl et al. (2018)"""
7+
8+
def test_Figure1a(self):
9+
"""Test listing in Figure 1a on a single task and the old OpenML100 study.
10+
11+
The original listing is pasted into the comment below because it the
12+
actual unit test differs a bit, as for example it does not run for all tasks,
13+
but only a single one.
14+
15+
import openml
16+
import sklearn.tree, sklearn.preprocessing
17+
benchmark_suite = openml.study.get_study('OpenML-CC18','tasks') # obtain the benchmark suite
18+
clf = sklearn.pipeline.Pipeline(steps=[('imputer',sklearn.preprocessing.Imputer()), ('estimator',sklearn.tree.DecisionTreeClassifier())]) # build a sklearn classifier
19+
for task_id in benchmark_suite.tasks: # iterate over all tasks
20+
task = openml.tasks.get_task(task_id) # download the OpenML task
21+
X, y = task.get_X_and_y() # get the data (not used in this example)
22+
openml.config.apikey = 'FILL_IN_OPENML_API_KEY' # set the OpenML Api Key
23+
run = openml.runs.run_model_on_task(task,clf) # run classifier on splits (requires API key)
24+
score = run.get_metric_fn(sklearn.metrics.accuracy_score) # print accuracy score
25+
print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name,score.mean()))
26+
run.publish() # publish the experiment on OpenML (optional)
27+
print('URL for run: %s/run/%d' %(openml.config.server,run.run_id))
28+
"""
29+
import openml
30+
import sklearn.tree, sklearn.preprocessing
31+
benchmark_suite = openml.study.get_study(
32+
'OpenML100', 'tasks'
33+
) # obtain the benchmark suite
34+
clf = sklearn.pipeline.Pipeline(
35+
steps=[
36+
('imputer', sklearn.preprocessing.Imputer()),
37+
('estimator', sklearn.tree.DecisionTreeClassifier())
38+
]
39+
) # build a sklearn classifier
40+
for task_id in benchmark_suite.tasks[:1]: # iterate over all tasks
41+
task = openml.tasks.get_task(task_id) # download the OpenML task
42+
X, y = task.get_X_and_y() # get the data (not used in this example)
43+
openml.config.apikey = openml.config.apikey # set the OpenML Api Key
44+
run = openml.runs.run_model_on_task(
45+
task, clf,
46+
) # run classifier on splits (requires API key)
47+
score = run.get_metric_fn(
48+
sklearn.metrics.accuracy_score
49+
) # print accuracy score
50+
print('Data set: %s; Accuracy: %0.2f' % (
51+
task.get_dataset().name, score.mean()))
52+
run.publish() # publish the experiment on OpenML (optional)
53+
print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))

0 commit comments

Comments
 (0)