Skip to content

Commit 59f3ff6

Browse files
authored
Merge pull request #711 from openml/fix_695
Function to list all data qualities
2 parents 3fab583 + 9aac35d commit 59f3ff6

File tree

5 files changed

+36
-0
lines changed

5 files changed

+36
-0
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Modules
7272
get_dataset
7373
get_datasets
7474
list_datasets
75+
list_qualities
7576
status_update
7677

7778
:mod:`openml.evaluations`: Evaluation Functions

doc/progress.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
Changelog
77
=========
88

9+
0.10.0
10+
~~~~~~
11+
* ADD #695: A function to retrieve all the data quality measures available.
12+
913
0.9.0
1014
~~~~~
1115
* ADD #560: OpenML-Python can now handle regression tasks as well.

openml/datasets/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
get_datasets,
77
list_datasets,
88
status_update,
9+
list_qualities
910
)
1011
from .dataset import OpenMLDataset
1112
from .data_feature import OpenMLDataFeature
@@ -20,4 +21,5 @@
2021
'OpenMLDataset',
2122
'OpenMLDataFeature',
2223
'status_update',
24+
'list_qualities'
2325
]

openml/datasets/functions.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,30 @@ def _get_cache_directory(dataset: OpenMLDataset) -> str:
165165
return _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset.dataset_id)
166166

167167

168+
def list_qualities() -> List[str]:
169+
""" Return list of data qualities available.
170+
171+
The function performs an API call to retrieve the entire list of
172+
data qualities that are computed on the datasets uploaded.
173+
174+
Returns
175+
-------
176+
list
177+
"""
178+
api_call = "data/qualities/list"
179+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
180+
qualities = xmltodict.parse(xml_string, force_list=('oml:quality'))
181+
# Minimalistic check if the XML is useful
182+
if 'oml:data_qualities_list' not in qualities:
183+
raise ValueError('Error in return XML, does not contain '
184+
'"oml:data_qualities_list"')
185+
if not isinstance(qualities['oml:data_qualities_list']['oml:quality'], list):
186+
raise TypeError('Error in return XML, does not contain '
187+
'"oml:quality" as a list')
188+
qualities = qualities['oml:data_qualities_list']['oml:quality']
189+
return qualities
190+
191+
168192
def list_datasets(
169193
offset: Optional[int] = None,
170194
size: Optional[int] = None,

tests/test_datasets/test_dataset_functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,3 +1190,8 @@ def test_create_dataset_attributes_auto_without_df(self):
11901190
original_data_url=original_data_url,
11911191
paper_url=paper_url
11921192
)
1193+
1194+
def test_list_qualities(self):
1195+
qualities = openml.datasets.list_qualities()
1196+
self.assertEqual(isinstance(qualities, list), True)
1197+
self.assertEqual(all([isinstance(q, str) for q in qualities]), True)

0 commit comments

Comments
 (0)