Skip to content

Commit 18d93cd

Browse files
authored
Merge pull request #278 from openml/setuplist
initial commit for setup list
2 parents ccd0912 + a190f32 commit 18d93cd

File tree

8 files changed

+148
-23
lines changed

8 files changed

+148
-23
lines changed

openml/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from . import runs
2323
from . import flows
2424
from . import setups
25+
from . import study
26+
from . import evaluations
2527
from .runs import OpenMLRun
2628
from .tasks import OpenMLTask, OpenMLSplit
2729
from .flows import OpenMLFlow
@@ -66,5 +68,6 @@ def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
6668

6769

6870
__all__ = ['OpenMLDataset', 'OpenMLDataFeature', 'OpenMLRun',
69-
'OpenMLSplit', 'datasets', 'OpenMLTask', 'OpenMLFlow',
71+
'OpenMLSplit', 'OpenMLEvaluation', 'OpenMLSetup',
72+
'OpenMLTask', 'OpenMLFlow', 'datasets', 'evaluations',
7073
'config', 'runs', 'flows', 'tasks', 'setups']

openml/setups/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
from .functions import get_setup, setup_exists, initialize_model
1+
from .setup import OpenMLSetup
2+
from .functions import get_setup, list_setups, setup_exists, initialize_model
23

3-
__all__ = ['get_setup', 'setup_exists', 'initialize_model']
4+
__all__ = ['get_setup', 'list_setups', 'setup_exists', 'initialize_model']

openml/setups/functions.py

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import xmltodict
55

66
from .setup import OpenMLSetup, OpenMLParameter
7-
from openml.flows import sklearn_to_flow, flow_exists
7+
from openml.flows import flow_exists
88

99

1010
def setup_exists(flow, model=None):
@@ -74,6 +74,80 @@ def get_setup(setup_id):
7474
return _create_setup_from_xml(result_dict)
7575

7676

77+
def list_setups(flow=None, tag=None, setup=None, offset=None, size=None):
78+
"""List all setups matching all of the given filters.
79+
80+
Perform API call `/setup/list/{filters}
81+
82+
Parameters
83+
----------
84+
flow : int, optional
85+
86+
tag : str, optional
87+
88+
setup : list(int), optional
89+
90+
offset : int, optional
91+
92+
size : int, optional
93+
94+
Returns
95+
-------
96+
list
97+
List of found setups.
98+
"""
99+
100+
api_call = "setup/list"
101+
if offset is not None:
102+
api_call += "/offset/%d" % int(offset)
103+
if size is not None:
104+
api_call += "/limit/%d" % int(size)
105+
if setup is not None:
106+
api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
107+
if flow is not None:
108+
api_call += "/flow/%s" % flow
109+
if tag is not None:
110+
api_call += "/tag/%s" % tag
111+
112+
return _list_setups(api_call)
113+
114+
115+
def _list_setups(api_call):
116+
"""Helper function to parse API calls which are lists of setups"""
117+
118+
xml_string = openml._api_calls._perform_api_call(api_call)
119+
120+
setups_dict = xmltodict.parse(xml_string)
121+
# Minimalistic check if the XML is useful
122+
if 'oml:setups' not in setups_dict:
123+
raise ValueError('Error in return XML, does not contain "oml:setups": %s'
124+
% str(setups_dict))
125+
elif '@xmlns:oml' not in setups_dict['oml:setups']:
126+
raise ValueError('Error in return XML, does not contain '
127+
'"oml:setups"/@xmlns:oml: %s'
128+
% str(setups_dict))
129+
elif setups_dict['oml:setups']['@xmlns:oml'] != 'http://openml.org/openml':
130+
raise ValueError('Error in return XML, value of '
131+
'"oml:seyups"/@xmlns:oml is not '
132+
'"http://openml.org/openml": %s'
133+
% str(setups_dict))
134+
135+
if isinstance(setups_dict['oml:setups']['oml:setup'], list):
136+
setups_list = setups_dict['oml:setups']['oml:setup']
137+
elif isinstance(setups_dict['oml:setups']['oml:setup'], dict):
138+
setups_list = [setups_dict['oml:setups']['oml:setup']]
139+
else:
140+
raise TypeError()
141+
142+
setups = dict()
143+
for setup_ in setups_list:
144+
# making it a dict to give it the right format
145+
current = _create_setup_from_xml({'oml:setup_parameters': setup_})
146+
setups[current.setup_id] = current
147+
148+
return setups
149+
150+
77151
def initialize_model(setup_id):
78152
'''
79153
Initialized a model based on a setup_id (i.e., using the exact
@@ -147,6 +221,7 @@ def _create_setup_from_xml(result_dict):
147221
'''
148222
Turns an API xml result into a OpenMLSetup object
149223
'''
224+
setup_id = int(result_dict['oml:setup_parameters']['oml:setup_id'])
150225
flow_id = int(result_dict['oml:setup_parameters']['oml:flow_id'])
151226
parameters = {}
152227
if 'oml:parameter' not in result_dict['oml:setup_parameters']:
@@ -164,7 +239,7 @@ def _create_setup_from_xml(result_dict):
164239
else:
165240
raise ValueError('Expected None, list or dict, received someting else: %s' %str(type(xml_parameters)))
166241

167-
return OpenMLSetup(flow_id, parameters)
242+
return OpenMLSetup(setup_id, flow_id, parameters)
168243

169244
def _create_setup_parameter_from_xml(result_dict):
170245
return OpenMLParameter(int(result_dict['oml:id']),
@@ -173,4 +248,4 @@ def _create_setup_parameter_from_xml(result_dict):
173248
result_dict['oml:parameter_name'],
174249
result_dict['oml:data_type'],
175250
result_dict['oml:default_value'],
176-
result_dict['oml:value'])
251+
result_dict['oml:value'])

openml/setups/setup.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,24 @@ class OpenMLSetup(object):
44
55
Parameters
66
----------
7+
setup_id : int
8+
The OpenML setup id
79
flow_id : int
810
The flow that it is build upon
911
parameters : dict
1012
The setting of the parameters
1113
"""
1214

13-
def __init__(self, flow_id, parameters):
15+
def __init__(self, setup_id, flow_id, parameters):
16+
if not isinstance(setup_id, int):
17+
raise ValueError('setup id should be int')
18+
if not isinstance(flow_id, int):
19+
raise ValueError('flow id should be int')
20+
if parameters is not None:
21+
if not isinstance(parameters, dict):
22+
raise ValueError('parameters should be dict')
23+
24+
self.setup_id = setup_id
1425
self.flow_id = flow_id
1526
self.parameters = parameters
1627

openml/study/functions.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ def get_study(study_id):
2727
creator = result_dict['oml:creator']
2828
tags = []
2929
for tag in _multitag_to_list(result_dict, 'oml:tag'):
30-
tags.append({'name': tag['oml:name'],
31-
'window_start': tag['oml:window_start'],
32-
'write_access': tag['oml:write_access']})
30+
current_tag = {'name': tag['oml:name'],
31+
'write_access': tag['oml:write_access']}
32+
if 'oml:window_start' in tag:
33+
current_tag['window_start'] = tag['oml:window_start']
34+
tags.append(current_tag)
3335

3436
datasets = None
3537
tasks = None

tests/test_flows/test_flow.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -239,17 +239,26 @@ def get_sentinel():
239239
def test_existing_flow_exists(self):
240240
# create a flow
241241
nb = sklearn.naive_bayes.GaussianNB()
242-
flow = openml.flows.sklearn_to_flow(nb)
243-
flow, _ = self._add_sentinel_to_flow_name(flow, None)
244-
#publish the flow
245-
flow = flow.publish()
246-
#redownload the flow
247-
flow = openml.flows.get_flow(flow.flow_id)
248-
249-
# check if flow exists can find it
250-
flow = openml.flows.get_flow(flow.flow_id)
251-
downloaded_flow_id = openml.flows.flow_exists(flow.name, flow.external_version)
252-
self.assertEquals(downloaded_flow_id, flow.flow_id)
242+
243+
steps = [('imputation', sklearn.preprocessing.Imputer(strategy='median')),
244+
('hotencoding', sklearn.preprocessing.OneHotEncoder(sparse=False,
245+
handle_unknown='ignore')),
246+
('variencethreshold', sklearn.feature_selection.VarianceThreshold()),
247+
('classifier', sklearn.tree.DecisionTreeClassifier())]
248+
complicated = sklearn.pipeline.Pipeline(steps=steps)
249+
250+
for classifier in [nb, complicated]:
251+
flow = openml.flows.sklearn_to_flow(classifier)
252+
flow, _ = self._add_sentinel_to_flow_name(flow, None)
253+
#publish the flow
254+
flow = flow.publish()
255+
#redownload the flow
256+
flow = openml.flows.get_flow(flow.flow_id)
257+
258+
# check if flow exists can find it
259+
flow = openml.flows.get_flow(flow.flow_id)
260+
downloaded_flow_id = openml.flows.flow_exists(flow.name, flow.external_version)
261+
self.assertEquals(downloaded_flow_id, flow.flow_id)
253262

254263
def test_sklearn_to_upload_to_flow(self):
255264
iris = sklearn.datasets.load_iris()

tests/test_flows/test_flow_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_list_flows(self):
2525
# data from the internet...
2626
flows = openml.flows.list_flows()
2727
# 3000 as the number of flows on openml.org
28-
self.assertGreaterEqual(len(flows), 3000)
28+
self.assertGreaterEqual(len(flows), 1500)
2929
for fid in flows:
3030
self._check_flow(flows[fid])
3131

tests/test_setups/test_setup_functions.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ def get_params(self, deep=True):
4747
return {}
4848

4949

50-
5150
class TestRun(TestBase):
5251

5352
def test_nonexisting_setup_exists(self):
@@ -117,3 +116,28 @@ def test_get_setup(self):
117116
self.assertIsNone(current.parameters)
118117
else:
119118
self.assertEquals(len(current.parameters), num_params[idx])
119+
120+
def test_setup_list_filter_flow(self):
121+
openml.config.server = self.production_server
122+
123+
flow_id = 5873
124+
125+
setups = openml.setups.list_setups(flow=flow_id)
126+
127+
self.assertGreater(len(setups), 0) # TODO: please adjust 0
128+
for setup_id in setups.keys():
129+
self.assertEquals(setups[setup_id].flow_id, flow_id)
130+
131+
def test_setuplist_offset(self):
132+
# TODO: remove after pull on live for better testing
133+
# openml.config.server = self.production_server
134+
135+
size = 100
136+
setups = openml.setups.list_setups(offset=0, size=size)
137+
self.assertEquals(len(setups), size)
138+
setups2 = openml.setups.list_setups(offset=size, size=size)
139+
self.assertEquals(len(setups), size)
140+
141+
all = set(setups.keys()).union(setups2.keys())
142+
143+
self.assertEqual(len(all), size * 2)

0 commit comments

Comments
 (0)