Skip to content

Commit 91be1ac

Browse files
authored
Merge pull request #747 from openml/add_#737
Add #737
2 parents e6ee09d + 1065264 commit 91be1ac

File tree

4 files changed

+131
-4
lines changed

4 files changed

+131
-4
lines changed

doc/progress.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Changelog
88

99
0.10.0
1010
~~~~~~
11+
* ADD #737: Add list_evaluations_setups to return hyperparameters along with list of evaluations.
1112
* FIX #261: Test server is cleared of all files uploaded during unit testing.
1213
* FIX #447: All files created by unit tests no longer persist in local.
1314
* FIX #608: Fixing dataset_id referenced before assignment error in get_run function.

openml/evaluations/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .evaluation import OpenMLEvaluation
2-
from .functions import list_evaluations, list_evaluation_measures
2+
from .functions import list_evaluations, list_evaluation_measures, list_evaluations_setups
33

4-
__all__ = ['OpenMLEvaluation', 'list_evaluations', 'list_evaluation_measures']
4+
__all__ = ['OpenMLEvaluation', 'list_evaluations', 'list_evaluation_measures',
5+
'list_evaluations_setups']

openml/evaluations/functions.py

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import json
22
import xmltodict
33
import pandas as pd
4+
import numpy as np
45
from typing import Union, List, Optional, Dict
56
import collections
67

78
import openml.utils
89
import openml._api_calls
910
from ..evaluations import OpenMLEvaluation
11+
import openml
1012

1113

1214
def list_evaluations(
@@ -209,8 +211,8 @@ def __list_evaluations(api_call, output_format='object'):
209211
'array_data': array_data}
210212

211213
if output_format == 'dataframe':
212-
evals = pd.DataFrame.from_dict(evals, orient='index')
213-
214+
rows = [value for key, value in evals.items()]
215+
evals = pd.DataFrame.from_records(rows, columns=rows[0].keys())
214216
return evals
215217

216218

@@ -238,3 +240,90 @@ def list_evaluation_measures() -> List[str]:
238240
'"oml:measure" as a list')
239241
qualities = qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure']
240242
return qualities
243+
244+
245+
def list_evaluations_setups(
246+
function: str,
247+
offset: Optional[int] = None,
248+
size: Optional[int] = None,
249+
id: Optional[List] = None,
250+
task: Optional[List] = None,
251+
setup: Optional[List] = None,
252+
flow: Optional[List] = None,
253+
uploader: Optional[List] = None,
254+
tag: Optional[str] = None,
255+
per_fold: Optional[bool] = None,
256+
sort_order: Optional[str] = None,
257+
output_format: str = 'dataframe'
258+
) -> Union[Dict, pd.DataFrame]:
259+
"""
260+
List all run-evaluation pairs matching all of the given filters
261+
and their hyperparameter settings.
262+
263+
Parameters
264+
----------
265+
function : str
266+
the evaluation function. e.g., predictive_accuracy
267+
offset : int, optional
268+
the number of runs to skip, starting from the first
269+
size : int, optional
270+
the maximum number of runs to show
271+
id : list[int], optional
272+
the list of evaluation ID's
273+
task : list[int], optional
274+
the list of task ID's
275+
setup: list[int], optional
276+
the list of setup ID's
277+
flow : list[int], optional
278+
the list of flow ID's
279+
uploader : list[int], optional
280+
the list of uploader ID's
281+
tag : str, optional
282+
filter evaluation based on given tag
283+
per_fold : bool, optional
284+
sort_order : str, optional
285+
order of sorting evaluations, ascending ("asc") or descending ("desc")
286+
output_format: str, optional (default='dataframe')
287+
The parameter decides the format of the output.
288+
- If 'dict' the output is a dict of dict
289+
- If 'dataframe' the output is a pandas DataFrame
290+
291+
292+
Returns
293+
-------
294+
dict or dataframe with hyperparameter settings as a list of tuples.
295+
"""
296+
# List evaluations
297+
evals = list_evaluations(function=function, offset=offset, size=size, id=id, task=task,
298+
setup=setup, flow=flow, uploader=uploader, tag=tag,
299+
per_fold=per_fold, sort_order=sort_order, output_format='dataframe')
300+
301+
# List setups
302+
# Split setups in evals into chunks of N setups as list_setups does not support large size
303+
df = pd.DataFrame()
304+
if len(evals) != 0:
305+
N = 100
306+
setup_chunks = np.split(evals['setup_id'].unique(),
307+
((len(evals['setup_id'].unique()) - 1) // N) + 1)
308+
setups = pd.DataFrame()
309+
for setup in setup_chunks:
310+
result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format='dataframe'))
311+
result.drop('flow_id', axis=1, inplace=True)
312+
# concat resulting setup chunks into single datframe
313+
setups = pd.concat([setups, result], ignore_index=True)
314+
parameters = []
315+
# Convert parameters of setup into list of tuples of (hyperparameter, value)
316+
for parameter_dict in setups['parameters']:
317+
if parameter_dict is not None:
318+
parameters.append([tuple([param['parameter_name'], param['value']])
319+
for param in parameter_dict.values()])
320+
else:
321+
parameters.append([])
322+
setups['parameters'] = parameters
323+
# Merge setups with evaluations
324+
df = pd.merge(evals, setups, on='setup_id', how='left')
325+
326+
if output_format == 'dataframe':
327+
return df
328+
else:
329+
return df.to_dict(orient='index')

tests/test_evaluations/test_evaluation_functions.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,30 @@
66
class TestEvaluationFunctions(TestBase):
77
_multiprocess_can_split_ = True
88

9+
def _check_list_evaluation_setups(self, size, **kwargs):
10+
evals_setups = openml.evaluations.list_evaluations_setups("predictive_accuracy",
11+
**kwargs, size=size,
12+
sort_order='desc',
13+
output_format='dataframe')
14+
evals = openml.evaluations.list_evaluations("predictive_accuracy",
15+
**kwargs, size=size,
16+
sort_order='desc',
17+
output_format='dataframe')
18+
19+
# Check if list is non-empty
20+
self.assertGreater(len(evals_setups), 0)
21+
# Check if output from sort is sorted in the right order
22+
self.assertSequenceEqual(sorted(evals_setups['value'].tolist(), reverse=True),
23+
evals_setups['value'].tolist())
24+
25+
# Check if output and order of list_evaluations is preserved
26+
self.assertSequenceEqual(evals_setups['run_id'].tolist(), evals['run_id'].tolist())
27+
# Check if the hyper-parameter column is as accurate and flow_id
28+
for index, row in evals_setups.iterrows():
29+
params = openml.runs.get_run(row['run_id']).parameter_settings
30+
hyper_params = [tuple([param['oml:name'], param['oml:value']]) for param in params]
31+
self.assertTrue(sorted(row['parameters']) == sorted(hyper_params))
32+
933
def test_evaluation_list_filter_task(self):
1034
openml.config.server = self.production_server
1135

@@ -142,3 +166,15 @@ def test_list_evaluation_measures(self):
142166
measures = openml.evaluations.list_evaluation_measures()
143167
self.assertEqual(isinstance(measures, list), True)
144168
self.assertEqual(all([isinstance(s, str) for s in measures]), True)
169+
170+
def test_list_evaluations_setups_filter_flow(self):
171+
openml.config.server = self.production_server
172+
flow_id = [405]
173+
size = 100
174+
self._check_list_evaluation_setups(size, flow=flow_id)
175+
176+
def test_list_evaluations_setups_filter_task(self):
177+
openml.config.server = self.production_server
178+
task_id = [6]
179+
size = 100
180+
self._check_list_evaluation_setups(size, task=task_id)

0 commit comments

Comments
 (0)