Skip to content

Commit 9d004b9

Browse files
authored
Merge branch 'develop' into fix_687
2 parents d809bb6 + 59f3ff6 commit 9d004b9

File tree

7 files changed

+48
-16
lines changed

7 files changed

+48
-16
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Modules
7272
get_dataset
7373
get_datasets
7474
list_datasets
75+
list_qualities
7576
status_update
7677

7778
:mod:`openml.evaluations`: Evaluation Functions

doc/progress.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ Changelog
88

99
0.10.0
1010
~~~~~~
11+
1112
* ADD #687: Adds a function to retrieve the list of evaluation measures available.
13+
* ADD #695: A function to retrieve all the data quality measures available.
1214

1315
0.9.0
1416
~~~~~

openml/datasets/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
get_datasets,
77
list_datasets,
88
status_update,
9+
list_qualities
910
)
1011
from .dataset import OpenMLDataset
1112
from .data_feature import OpenMLDataFeature
@@ -20,4 +21,5 @@
2021
'OpenMLDataset',
2122
'OpenMLDataFeature',
2223
'status_update',
24+
'list_qualities'
2325
]

openml/datasets/functions.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,30 @@ def _get_cache_directory(dataset: OpenMLDataset) -> str:
165165
return _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset.dataset_id)
166166

167167

168+
def list_qualities() -> List[str]:
169+
""" Return list of data qualities available.
170+
171+
The function performs an API call to retrieve the entire list of
172+
data qualities that are computed on the datasets uploaded.
173+
174+
Returns
175+
-------
176+
list
177+
"""
178+
api_call = "data/qualities/list"
179+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
180+
qualities = xmltodict.parse(xml_string, force_list=('oml:quality'))
181+
# Minimalistic check if the XML is useful
182+
if 'oml:data_qualities_list' not in qualities:
183+
raise ValueError('Error in return XML, does not contain '
184+
'"oml:data_qualities_list"')
185+
if not isinstance(qualities['oml:data_qualities_list']['oml:quality'], list):
186+
raise TypeError('Error in return XML, does not contain '
187+
'"oml:quality" as a list')
188+
qualities = qualities['oml:data_qualities_list']['oml:quality']
189+
return qualities
190+
191+
168192
def list_datasets(
169193
offset: Optional[int] = None,
170194
size: Optional[int] = None,

openml/runs/functions.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,22 +78,22 @@ def run_model_on_task(
7878
Flow generated from the model.
7979
"""
8080

81-
extension = get_extension_by_model(model, raise_if_no_extension=True)
82-
if extension is None:
83-
# This should never happen and is only here to please mypy will be gone soon once the
84-
# whole function is removed
85-
raise TypeError(extension)
86-
8781
# TODO: At some point in the future do not allow for arguments in old order (6-2018).
8882
# Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
8983
# When removing this please also remove the method `is_estimator` from the extension
9084
# interface as it is only used here (MF, 3-2019)
91-
if isinstance(model, OpenMLTask) and extension.is_estimator(model):
85+
if isinstance(model, OpenMLTask):
9286
warnings.warn("The old argument order (task, model) is deprecated and "
9387
"will not be supported in the future. Please use the "
9488
"order (model, task).", DeprecationWarning)
9589
task, model = model, task
9690

91+
extension = get_extension_by_model(model, raise_if_no_extension=True)
92+
if extension is None:
93+
# This should never happen and is only here to please mypy will be gone soon once the
94+
# whole function is removed
95+
raise TypeError(extension)
96+
9797
flow = extension.model_to_flow(model)
9898

9999
run = run_flow_on_task(
@@ -159,9 +159,6 @@ def run_flow_on_task(
159159
if flow_tags is not None and not isinstance(flow_tags, list):
160160
raise ValueError("flow_tags should be a list")
161161

162-
if task.task_id is None:
163-
raise ValueError("The task should be published at OpenML")
164-
165162
# TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
166163
# Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
167164
if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
@@ -171,6 +168,9 @@ def run_flow_on_task(
171168
"order (model, Flow).", DeprecationWarning)
172169
task, flow = flow, task
173170

171+
if task.task_id is None:
172+
raise ValueError("The task should be published at OpenML")
173+
174174
flow.model = flow.extension.seed_model(flow.model, seed=seed)
175175

176176
# We only need to sync with the server right now if we want to upload the flow,

tests/test_datasets/test_dataset_functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,3 +1190,8 @@ def test_create_dataset_attributes_auto_without_df(self):
11901190
original_data_url=original_data_url,
11911191
paper_url=paper_url
11921192
)
1193+
1194+
def test_list_qualities(self):
1195+
qualities = openml.datasets.list_qualities()
1196+
self.assertEqual(isinstance(qualities, list), True)
1197+
self.assertEqual(all([isinstance(q, str) for q in qualities]), True)

tests/test_runs/test_run_functions.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ def _test_local_evaluations(self, run):
725725
self.assertGreaterEqual(alt_scores[idx], 0)
726726
self.assertLessEqual(alt_scores[idx], 1)
727727

728-
def test_local_run_metric_score_swapped_parameter_order_model(self):
728+
def test_local_run_swapped_parameter_order_model(self):
729729

730730
# construct sci-kit learn classifier
731731
clf = Pipeline(steps=[('imputer', Imputer(strategy='median')),
@@ -736,15 +736,14 @@ def test_local_run_metric_score_swapped_parameter_order_model(self):
736736

737737
# invoke OpenML run
738738
run = openml.runs.run_model_on_task(
739-
model=clf,
740-
task=task,
739+
task, clf,
741740
avoid_duplicate_runs=False,
742741
upload_flow=False,
743742
)
744743

745744
self._test_local_evaluations(run)
746745

747-
def test_local_run_metric_score_swapped_parameter_order_flow(self):
746+
def test_local_run_swapped_parameter_order_flow(self):
748747

749748
# construct sci-kit learn classifier
750749
clf = Pipeline(steps=[('imputer', Imputer(strategy='median')),
@@ -756,8 +755,7 @@ def test_local_run_metric_score_swapped_parameter_order_flow(self):
756755

757756
# invoke OpenML run
758757
run = openml.runs.run_flow_on_task(
759-
flow=flow,
760-
task=task,
758+
task, flow,
761759
avoid_duplicate_runs=False,
762760
upload_flow=False,
763761
)

0 commit comments

Comments
 (0)