Merge branch 'develop' into fix_687

PGijsbers · web-flow · commit 9d004b9304bb · 2019-06-20T02:51:49.000-07:00
diff --git a/doc/api.rst b/doc/api.rst
@@ -72,6 +72,7 @@ Modules
     get_dataset
     get_datasets
     list_datasets
+    list_qualities
     status_update
 
 :mod:`openml.evaluations`: Evaluation Functions
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -8,7 +8,9 @@ Changelog
 
 0.10.0
 ~~~~~~
+
 * ADD #687: Adds a function to retrieve the list of evaluation measures available.
+* ADD #695: A function to retrieve all the data quality measures available.
 
 0.9.0
 ~~~~~
diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py
@@ -6,6 +6,7 @@
     get_datasets,
     list_datasets,
     status_update,
+    list_qualities
 )
 from .dataset import OpenMLDataset
 from .data_feature import OpenMLDataFeature
@@ -20,4 +21,5 @@
     'OpenMLDataset',
     'OpenMLDataFeature',
     'status_update',
+    'list_qualities'
 ]
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -165,6 +165,30 @@ def _get_cache_directory(dataset: OpenMLDataset) -> str:
     return _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset.dataset_id)
 
 
+def list_qualities() -> List[str]:
+    """ Return list of data qualities available.
+
+    The function performs an API call to retrieve the entire list of
+    data qualities that are computed on the datasets uploaded.
+
+    Returns
+    -------
+    list
+    """
+    api_call = "data/qualities/list"
+    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
+    qualities = xmltodict.parse(xml_string, force_list=('oml:quality'))
+    # Minimalistic check if the XML is useful
+    if 'oml:data_qualities_list' not in qualities:
+        raise ValueError('Error in return XML, does not contain '
+                         '"oml:data_qualities_list"')
+    if not isinstance(qualities['oml:data_qualities_list']['oml:quality'], list):
+        raise TypeError('Error in return XML, does not contain '
+                        '"oml:quality" as a list')
+    qualities = qualities['oml:data_qualities_list']['oml:quality']
+    return qualities
+
+
 def list_datasets(
     offset: Optional[int] = None,
     size: Optional[int] = None,
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -78,22 +78,22 @@ def run_model_on_task(
         Flow generated from the model.
     """
 
-    extension = get_extension_by_model(model, raise_if_no_extension=True)
-    if extension is None:
-        # This should never happen and is only here to please mypy will be gone soon once the
-        # whole function is removed
-        raise TypeError(extension)
-
     # TODO: At some point in the future do not allow for arguments in old order (6-2018).
     # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
     # When removing this please also remove the method `is_estimator` from the extension
     # interface as it is only used here (MF, 3-2019)
-    if isinstance(model, OpenMLTask) and extension.is_estimator(model):
+    if isinstance(model, OpenMLTask):
         warnings.warn("The old argument order (task, model) is deprecated and "
                       "will not be supported in the future. Please use the "
                       "order (model, task).", DeprecationWarning)
         task, model = model, task
 
+    extension = get_extension_by_model(model, raise_if_no_extension=True)
+    if extension is None:
+        # This should never happen and is only here to please mypy will be gone soon once the
+        # whole function is removed
+        raise TypeError(extension)
+
     flow = extension.model_to_flow(model)
 
     run = run_flow_on_task(
@@ -159,9 +159,6 @@ def run_flow_on_task(
     if flow_tags is not None and not isinstance(flow_tags, list):
         raise ValueError("flow_tags should be a list")
 
-    if task.task_id is None:
-        raise ValueError("The task should be published at OpenML")
-
     # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
     # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
     if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
@@ -171,6 +168,9 @@ def run_flow_on_task(
                       "order (model, Flow).", DeprecationWarning)
         task, flow = flow, task
 
+    if task.task_id is None:
+        raise ValueError("The task should be published at OpenML")
+
     flow.model = flow.extension.seed_model(flow.model, seed=seed)
 
     # We only need to sync with the server right now if we want to upload the flow,
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
@@ -1190,3 +1190,8 @@ def test_create_dataset_attributes_auto_without_df(self):
                 original_data_url=original_data_url,
                 paper_url=paper_url
             )
+
+    def test_list_qualities(self):
+        qualities = openml.datasets.list_qualities()
+        self.assertEqual(isinstance(qualities, list), True)
+        self.assertEqual(all([isinstance(q, str) for q in qualities]), True)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -725,7 +725,7 @@ def _test_local_evaluations(self, run):
                 self.assertGreaterEqual(alt_scores[idx], 0)
                 self.assertLessEqual(alt_scores[idx], 1)
 
-    def test_local_run_metric_score_swapped_parameter_order_model(self):
+    def test_local_run_swapped_parameter_order_model(self):
 
         # construct sci-kit learn classifier
         clf = Pipeline(steps=[('imputer', Imputer(strategy='median')),
@@ -736,15 +736,14 @@ def test_local_run_metric_score_swapped_parameter_order_model(self):
 
         # invoke OpenML run
         run = openml.runs.run_model_on_task(
-            model=clf,
-            task=task,
+            task, clf,
             avoid_duplicate_runs=False,
             upload_flow=False,
         )
 
         self._test_local_evaluations(run)
 
-    def test_local_run_metric_score_swapped_parameter_order_flow(self):
+    def test_local_run_swapped_parameter_order_flow(self):
 
         # construct sci-kit learn classifier
         clf = Pipeline(steps=[('imputer', Imputer(strategy='median')),
@@ -756,8 +755,7 @@ def test_local_run_metric_score_swapped_parameter_order_flow(self):
 
         # invoke OpenML run
         run = openml.runs.run_flow_on_task(
-            flow=flow,
-            task=task,
+            task, flow,
             avoid_duplicate_runs=False,
             upload_flow=False,
         )

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@`
`6`	`6`	`get_datasets,`
`7`	`7`	`list_datasets,`
`8`	`8`	`status_update,`
	`9`	`+ list_qualities`
`9`	`10`	`)`
`10`	`11`	`from .dataset import OpenMLDataset`
`11`	`12`	`from .data_feature import OpenMLDataFeature`
`@@ -20,4 +21,5 @@`
`20`	`21`	`'OpenMLDataset',`
`21`	`22`	`'OpenMLDataFeature',`
`22`	`23`	`'status_update',`
	`24`	`+ 'list_qualities'`
`23`	`25`	`]`
Original file line number	Diff line number	Diff line change
`@@ -1190,3 +1190,8 @@ def test_create_dataset_attributes_auto_without_df(self):`
`1190`	`1190`	`original_data_url=original_data_url,`
`1191`	`1191`	`paper_url=paper_url`
`1192`	`1192`	`)`
	`1193`	`+`
	`1194`	`+ def test_list_qualities(self):`
	`1195`	`+ qualities = openml.datasets.list_qualities()`
	`1196`	`+ self.assertEqual(isinstance(qualities, list), True)`
	`1197`	`+ self.assertEqual(all([isinstance(q, str) for q in qualities]), True)`