Skip to content

Commit 993dbea

Browse files
authored
Merge branch 'develop' into learningcurves
2 parents 62eff10 + de66af0 commit 993dbea

File tree

11 files changed

+1672
-1565
lines changed

11 files changed

+1672
-1565
lines changed

examples/OpenMLDemo.ipynb

Lines changed: 0 additions & 703 deletions
This file was deleted.

examples/OpenML_Tutorial.ipynb

Lines changed: 1344 additions & 0 deletions
Large diffs are not rendered by default.

examples/PyOpenML.ipynb

Lines changed: 0 additions & 862 deletions
This file was deleted.

openml/evaluations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .evaluation import OpenMLEvaluation
2+
from .functions import list_evaluations

openml/evaluations/evaluation.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
2+
class OpenMLEvaluation(object):
3+
'''
4+
Contains all meta-information about a run / evaluation combination,
5+
according to the evaluation/list function
6+
7+
Parameters
8+
----------
9+
run_id : int
10+
task_id : int
11+
setup_id : int
12+
flow_id : int
13+
flow_name : str
14+
data_id : int
15+
data_name : str
16+
the name of the dataset
17+
function : str
18+
the evaluation function of this item (e.g., accuracy)
19+
upload_time : str
20+
the time of evaluation
21+
value : float
22+
the value of this evaluation
23+
array_data : str
24+
list of information per class (e.g., in case of precision, auroc, recall)
25+
'''
26+
def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
27+
data_id, data_name, function, upload_time, value,
28+
array_data=None):
29+
self.run_id = run_id
30+
self.task_id = task_id
31+
self.setup_id = setup_id
32+
self.flow_id = flow_id
33+
self.flow_name = flow_name
34+
self.data_id = data_id
35+
self.data_name = data_name
36+
self.function = function
37+
self.upload_time = upload_time
38+
self.value = value
39+
self.array_data = array_data
40+

openml/evaluations/functions.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import xmltodict
2+
3+
from .._api_calls import _perform_api_call
4+
from ..evaluations import OpenMLEvaluation
5+
6+
def list_evaluations(function, offset=None, size=None, id=None, task=None, setup=None,
7+
flow=None, uploader=None, tag=None):
8+
"""List all run-evaluation pairs matching all of the given filters.
9+
10+
Perform API call `/evaluation/function{function}/{filters}
11+
12+
Parameters
13+
----------
14+
function : str
15+
the evaluation function. e.g., predictive_accuracy
16+
offset : int, optional
17+
the number of runs to skip, starting from the first
18+
size : int, optional
19+
the maximum number of runs to show
20+
21+
id : list, optional
22+
23+
task : list, optional
24+
25+
setup: list, optional
26+
27+
flow : list, optional
28+
29+
uploader : list, optional
30+
31+
tag : str, optional
32+
33+
Returns
34+
-------
35+
list
36+
List of found evaluations.
37+
"""
38+
39+
api_call = "evaluation/list/function/%s" %function
40+
if offset is not None:
41+
api_call += "/offset/%d" % int(offset)
42+
if size is not None:
43+
api_call += "/limit/%d" % int(size)
44+
if id is not None:
45+
api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
46+
if task is not None:
47+
api_call += "/task/%s" % ','.join([str(int(i)) for i in task])
48+
if setup is not None:
49+
api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
50+
if flow is not None:
51+
api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
52+
if uploader is not None:
53+
api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
54+
if tag is not None:
55+
api_call += "/tag/%s" % tag
56+
57+
return _list_evaluations(api_call)
58+
59+
60+
def _list_evaluations(api_call):
61+
"""Helper function to parse API calls which are lists of runs"""
62+
63+
xml_string = _perform_api_call(api_call)
64+
65+
evals_dict = xmltodict.parse(xml_string)
66+
# Minimalistic check if the XML is useful
67+
if 'oml:evaluations' not in evals_dict:
68+
raise ValueError('Error in return XML, does not contain "oml:evaluations": %s'
69+
% str(evals_dict))
70+
71+
if isinstance(evals_dict['oml:evaluations']['oml:evaluation'], list):
72+
evals_list = evals_dict['oml:evaluations']['oml:evaluation']
73+
elif isinstance(evals_dict['oml:evaluations']['oml:evaluation'], dict):
74+
evals_list = [evals_dict['oml:evaluations']['oml:evaluation']]
75+
else:
76+
raise TypeError()
77+
78+
evals = dict()
79+
for eval_ in evals_list:
80+
run_id = int(eval_['oml:run_id'])
81+
array_data = None
82+
if 'oml:array_data' in eval_:
83+
eval_['oml:array_data']
84+
85+
evaluation = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']),
86+
int(eval_['oml:setup_id']), int(eval_['oml:flow_id']),
87+
eval_['oml:flow_name'], eval_['oml:data_id'],
88+
eval_['oml:data_name'], eval_['oml:function'],
89+
eval_['oml:upload_time'], float(eval_['oml:value']),
90+
array_data)
91+
evals[run_id] = evaluation
92+
return evals
93+

openml/study/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .study import OpenMLStudy
2+
from .functions import get_study

openml/study/functions.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import xmltodict
2+
3+
from openml.study import OpenMLStudy
4+
from .._api_calls import _perform_api_call
5+
6+
def _multitag_to_list(result_dict, tag):
7+
if isinstance(result_dict[tag], list):
8+
return result_dict[tag]
9+
elif isinstance(result_dict[tag], dict):
10+
return [result_dict[tag]]
11+
else:
12+
raise TypeError()
13+
14+
15+
def get_study(study_id):
16+
'''
17+
Retrieves all relevant information of an OpenML study from the server
18+
Note that some of the (data, tasks, flows, setups) fields can be empty
19+
(depending on information on the server)
20+
'''
21+
xml_string = _perform_api_call("study/%d" %(study_id))
22+
result_dict = xmltodict.parse(xml_string)['oml:study']
23+
id = int(result_dict['oml:id'])
24+
name = result_dict['oml:name']
25+
description = result_dict['oml:description']
26+
creation_date = result_dict['oml:creation_date']
27+
creator = result_dict['oml:creator']
28+
tags = []
29+
for tag in _multitag_to_list(result_dict, 'oml:tag'):
30+
tags.append({'name': tag['oml:name'],
31+
'window_start': tag['oml:window_start'],
32+
'write_access': tag['oml:write_access']})
33+
34+
datasets = None
35+
tasks = None
36+
flows = None
37+
setups = None
38+
39+
if 'oml:data' in result_dict:
40+
datasets = [int(x) for x in result_dict['oml:data']['oml:data_id']]
41+
42+
if 'oml:tasks' in result_dict:
43+
tasks = [int(x) for x in result_dict['oml:tasks']['oml:task_id']]
44+
45+
if 'oml:flows' in result_dict:
46+
flows = [int(x) for x in result_dict['oml:flows']['oml:flow_id']]
47+
48+
if 'oml:setups' in result_dict:
49+
setups = [int(x) for x in result_dict['oml:setups']['oml:setup_id']]
50+
51+
study = OpenMLStudy(id, name, description, creation_date, creator, tags,
52+
datasets, tasks, flows, setups)
53+
return study

openml/study/study.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
2+
class OpenMLStudy(object):
3+
'''
4+
An OpenMLStudy represents the OpenML concept of a study. It contains
5+
the following information: name, id, description, creation date,
6+
creator id and a set of tags.
7+
8+
According to this list of tags, the study object receives a list of
9+
OpenML object ids (datasets, flows, tasks and setups).
10+
11+
Can be used to obtain all relevant information from a study at once.
12+
13+
Parameters
14+
----------
15+
id : int
16+
the study id
17+
name : str
18+
the name of the study (meta-info)
19+
description : str
20+
brief description (meta-info)
21+
creation_date : str
22+
date of creation (meta-info)
23+
creator : int
24+
openml user id of the owner / creator
25+
tag : list(dict)
26+
The list of tags shows which tags are associated with the study.
27+
Each tag is a dict of (tag) name, window_start and write_access.
28+
data : list
29+
a list of data ids associated with this study
30+
tasks : list
31+
a list of task ids associated with this study
32+
flows : list
33+
a list of flow ids associated with this study
34+
setups : list
35+
a list of setup ids associated with this study
36+
'''
37+
38+
def __init__(self, id, name, description, creation_date, creator,
39+
tag, data, tasks, flows, setups):
40+
self.id = id
41+
self.name = name
42+
self.description = description
43+
self.creation_date = creation_date
44+
self.creator = creator
45+
self.tag = tag
46+
self.data = data
47+
self.tasks = tasks
48+
self.flows = flows
49+
self.setups = setups
50+
pass
51+
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import openml
2+
import openml.evaluations
3+
from openml.testing import TestBase
4+
5+
class TestEvaluationFunctions(TestBase):
6+
7+
def test_evaluation_list_filter_task(self):
8+
openml.config.server = self.production_server
9+
10+
task_id = 7312
11+
12+
evaluations = openml.evaluations.list_evaluations("predictive_accuracy", task=[task_id])
13+
14+
self.assertGreater(len(evaluations), 100)
15+
for run_id in evaluations.keys():
16+
self.assertEquals(evaluations[run_id].task_id, task_id)
17+
18+
19+
def test_evaluation_list_filter_uploader(self):
20+
openml.config.server = self.production_server
21+
22+
uploader_id = 16
23+
24+
evaluations = openml.evaluations.list_evaluations("predictive_accuracy", uploader=[uploader_id])
25+
26+
self.assertGreater(len(evaluations), 100)
27+
# for run_id in evaluations.keys():
28+
# self.assertEquals(evaluations[run_id].uploader, uploader_id)
29+
30+
31+
def test_evaluation_list_filter_uploader(self):
32+
openml.config.server = self.production_server
33+
34+
setup_id = 10
35+
36+
evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setup=[setup_id])
37+
38+
self.assertGreater(len(evaluations), 100)
39+
for run_id in evaluations.keys():
40+
self.assertEquals(evaluations[run_id].setup_id, setup_id)
41+
42+
43+
def test_evaluation_list_filter_flow(self):
44+
openml.config.server = self.production_server
45+
46+
flow_id = 100
47+
48+
evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flow=[flow_id])
49+
50+
self.assertGreater(len(evaluations), 2)
51+
for run_id in evaluations.keys():
52+
self.assertEquals(evaluations[run_id].flow_id, flow_id)
53+
54+
55+
def test_evaluation_list_filter_run(self):
56+
openml.config.server = self.production_server
57+
58+
run_id = 1
59+
60+
evaluations = openml.evaluations.list_evaluations("predictive_accuracy", id=[run_id])
61+
62+
self.assertEquals(len(evaluations), 1)
63+
for run_id in evaluations.keys():
64+
self.assertEquals(evaluations[run_id].run_id, run_id)
65+
66+
67+
def test_evaluation_list_limit(self):
68+
openml.config.server = self.production_server
69+
70+
evaluations = openml.evaluations.list_evaluations("predictive_accuracy", size=100, offset=100)
71+
self.assertEquals(len(evaluations), 100)

0 commit comments

Comments
 (0)