openml
diff --git a/‎doc/api.rst‎
Lines changed: 2 additions & 0 deletions b/‎doc/api.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/progress.rst‎
Lines changed: 2 additions & 0 deletions b/‎doc/progress.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎openml/datasets/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎openml/datasets/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎openml/datasets/dataset.py‎
Lines changed: 53 additions & 4 deletions b/‎openml/datasets/dataset.py‎
Lines changed: 53 additions & 4 deletions
diff --git a/‎openml/datasets/functions.py‎
Lines changed: 24 additions & 0 deletions b/‎openml/datasets/functions.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎openml/evaluations/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎openml/evaluations/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎openml/evaluations/evaluation.py‎
Lines changed: 31 additions & 0 deletions b/‎openml/evaluations/evaluation.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎openml/evaluations/functions.py‎
Lines changed: 26 additions & 0 deletions b/‎openml/evaluations/functions.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎openml/flows/flow.py‎
Lines changed: 31 additions & 0 deletions b/‎openml/flows/flow.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎openml/runs/functions.py‎
Lines changed: 10 additions & 10 deletions b/‎openml/runs/functions.py‎
Lines changed: 10 additions & 10 deletions
@@ -72,6 +72,7 @@ Modules
     get_dataset
     get_datasets
     list_datasets
+    list_qualities
     status_update
 
 :mod:`openml.evaluations`: Evaluation Functions
@@ -83,6 +84,7 @@ Modules
    :template: function.rst
 
     list_evaluations
+    list_evaluation_measures
 
 :mod:`openml.flows`: Flow Functions
 -----------------------------------
 
@@ -10,6 +10,8 @@ Changelog
 ~~~~~~
 * FIX #589: Fixing a bug that did not successfully upload the columns to ignore when creating and publishing a dataset.
 * DOC #639: More descriptive documention for function to convert array format.
+* ADD #687: Adds a function to retrieve the list of evaluation measures available.
+* ADD #695: A function to retrieve all the data quality measures available.
 
 0.9.0
 ~~~~~
 
@@ -6,6 +6,7 @@
     get_datasets,
     list_datasets,
     status_update,
+    list_qualities
 )
 from .dataset import OpenMLDataset
 from .data_feature import OpenMLDataFeature
@@ -20,4 +21,5 @@
     'OpenMLDataset',
     'OpenMLDataFeature',
     'status_update',
+    'list_qualities'
 ]
@@ -173,6 +173,36 @@ def __init__(self, name, description, format=None,
         else:
             self.data_pickle_file = None
 
+    def __str__(self):
+        header = "OpenML Dataset"
+        header = '{}\n{}\n'.format(header, '=' * len(header))
+
+        base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
+        fields = {"Name": self.name,
+                  "Version": self.version,
+                  "Format": self.format,
+                  "Licence": self.licence,
+                  "Download URL": self.url,
+                  "Data file": self.data_file,
+                  "Pickle file": self.data_pickle_file,
+                  "# of features": len(self.features)}
+        if self.upload_date is not None:
+            fields["Upload Date"] = self.upload_date.replace('T', ' ')
+        if self.dataset_id is not None:
+            fields["OpenML URL"] = "{}d/{}".format(base_url, self.dataset_id)
+        if self.qualities['NumberOfInstances'] is not None:
+            fields["# of instances"] = int(self.qualities['NumberOfInstances'])
+
+        # determines the order in which the information will be printed
+        order = ["Name", "Version", "Format", "Upload Date", "Licence", "Download URL",
+                 "OpenML URL", "Data File", "Pickle File", "# of features", "# of instances"]
+        fields = [(key, fields[key]) for key in order if key in fields]
+
+        longest_field_name_length = max(len(name) for name, value in fields)
+        field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
+        body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
+        return header + body
+
     def _data_arff_to_pickle(self, data_file):
         data_pickle_file = data_file.replace('.arff', '.pkl.py3')
         if os.path.exists(data_pickle_file):
@@ -368,9 +398,25 @@ def decode_arff(fh):
     def _convert_array_format(data, array_format, attribute_names):
         """Convert a dataset to a given array format.
 
-        By default, the data are stored as a sparse matrix or a pandas
-        dataframe. One might be interested to get a pandas SparseDataFrame or a
-        NumPy array instead, respectively.
+        Converts to numpy array if data is non-sparse.
+        Converts to a sparse dataframe if data is sparse.
+
+        Parameters
+        ----------
+        array_format : str {'array', 'dataframe'}
+            Desired data type of the output
+            - If array_format='array'
+                If data is non-sparse
+                    Converts to numpy-array
+                    Enforces numeric encoding of categorical columns
+                    Missing values are represented as NaN in the numpy-array
+                else returns data as is
+            - If array_format='dataframe'
+                If data is sparse
+                    Works only on sparse data
+                    Converts sparse data to sparse dataframe
+                else returns data as is
+
         """
         if array_format == "array" and not scipy.sparse.issparse(data):
             # We encode the categories such that they are integer to be able
@@ -396,8 +442,11 @@ def _encode_if_category(column):
                     'PyOpenML cannot handle string when returning numpy'
                     ' arrays. Use dataset_format="dataframe".'
                 )
-        if array_format == "dataframe" and scipy.sparse.issparse(data):
+        elif array_format == "dataframe" and scipy.sparse.issparse(data):
             return pd.SparseDataFrame(data, columns=attribute_names)
+        else:
+            data_type = "sparse-data" if scipy.sparse.issparse(data) else "non-sparse data"
+            warn("Cannot convert {} to '{}'. Returning input data.".format(data_type, array_format))
         return data
 
     @staticmethod
 
@@ -165,6 +165,30 @@ def _get_cache_directory(dataset: OpenMLDataset) -> str:
     return _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset.dataset_id)
 
 
+def list_qualities() -> List[str]:
+    """ Return list of data qualities available.
+
+    The function performs an API call to retrieve the entire list of
+    data qualities that are computed on the datasets uploaded.
+
+    Returns
+    -------
+    list
+    """
+    api_call = "data/qualities/list"
+    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
+    qualities = xmltodict.parse(xml_string, force_list=('oml:quality'))
+    # Minimalistic check if the XML is useful
+    if 'oml:data_qualities_list' not in qualities:
+        raise ValueError('Error in return XML, does not contain '
+                         '"oml:data_qualities_list"')
+    if not isinstance(qualities['oml:data_qualities_list']['oml:quality'], list):
+        raise TypeError('Error in return XML, does not contain '
+                        '"oml:quality" as a list')
+    qualities = qualities['oml:data_qualities_list']['oml:quality']
+    return qualities
+
+
 def list_datasets(
     offset: Optional[int] = None,
     size: Optional[int] = None,
 
@@ -1,4 +1,4 @@
 from .evaluation import OpenMLEvaluation
-from .functions import list_evaluations
+from .functions import list_evaluations, list_evaluation_measures
 
-__all__ = ['OpenMLEvaluation', 'list_evaluations']
+__all__ = ['OpenMLEvaluation', 'list_evaluations', 'list_evaluation_measures']
@@ -1,3 +1,5 @@
+import openml.config
+
 
 class OpenMLEvaluation(object):
     """
@@ -47,3 +49,32 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
         self.value = value
         self.values = values
         self.array_data = array_data
+
+    def __str__(self):
+        header = "OpenML Evaluation"
+        header = '{}\n{}\n'.format(header, '=' * len(header))
+
+        base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
+        fields = {"Upload Date": self.upload_time,
+                  "Run ID": self.run_id,
+                  "OpenML Run URL": "{}r/{}".format(base_url, self.run_id),
+                  "Task ID": self.task_id,
+                  "OpenML Task URL": "{}t/{}".format(base_url, self.task_id),
+                  "Flow ID": self.flow_id,
+                  "OpenML Flow URL": "{}f/{}".format(base_url, self.flow_id),
+                  "Setup ID": self.setup_id,
+                  "Data ID": self.data_id,
+                  "Data Name": self.data_name,
+                  "OpenML Data URL": "{}d/{}".format(base_url, self.data_id),
+                  "Metric Used": self.function,
+                  "Result": self.value}
+
+        order = ["Uploader Date", "Run ID", "OpenML Run URL", "Task ID", "OpenML Task URL"
+                 "Flow ID", "OpenML Flow URL", "Setup ID", "Data ID", "Data Name",
+                 "OpenML Data URL", "Metric Used", "Result"]
+        fields = [(key, fields[key]) for key in order if key in fields]
+
+        longest_field_name_length = max(len(name) for name, value in fields)
+        field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
+        body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
+        return header + body
@@ -200,3 +200,29 @@ def __list_evaluations(api_call, output_format='object'):
         evals = pd.DataFrame.from_dict(evals, orient='index')
 
     return evals
+
+
+def list_evaluation_measures() -> List[str]:
+    """ Return list of evaluation measures available.
+
+    The function performs an API call to retrieve the entire list of
+    evaluation measures that are available.
+
+    Returns
+    -------
+    list
+
+    """
+    api_call = "evaluationmeasure/list"
+    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
+    qualities = xmltodict.parse(xml_string, force_list=('oml:measures'))
+    # Minimalistic check if the XML is useful
+    if 'oml:evaluation_measures' not in qualities:
+        raise ValueError('Error in return XML, does not contain '
+                         '"oml:evaluation_measures"')
+    if not isinstance(qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure'],
+                      list):
+        raise TypeError('Error in return XML, does not contain '
+                        '"oml:measure" as a list')
+    qualities = qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure']
+    return qualities
@@ -7,6 +7,8 @@
 from ..extensions import get_extension_by_flow
 from ..utils import extract_xml_tags, _tag_entity
 
+import openml.config
+
 
 class OpenMLFlow(object):
     """OpenML Flow. Stores machine learning models.
@@ -132,6 +134,35 @@ def __init__(self, name, description, model, components, parameters,
 
         self.extension = get_extension_by_flow(self)
 
+    def __str__(self):
+        header = "OpenML Flow"
+        header = '{}\n{}\n'.format(header, '=' * len(header))
+
+        base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
+        fields = {"Flow Name": self.name,
+                  "Flow Description": self.description,
+                  "Dependencies": self.dependencies}
+        if self.flow_id is not None:
+            if self.version is not None:
+                fields["Flow ID"] = "{} (version {})".format(self.flow_id, self.version)
+            else:
+                fields["Flow ID"] = self.flow_id
+            fields["Flow URL"] = "{}f/{}".format(base_url, self.flow_id)
+        if self.upload_date is not None:
+            fields["Upload Date"] = self.upload_date.replace('T', ' ')
+        if self.binary_url is not None:
+            fields["Binary URL"] = self.binary_url
+
+        # determines the order in which the information will be printed
+        order = ["Flow ID", "Flow URL", "Flow Name", "Flow Description", "Binary URL",
+                 "Upload Date", "Dependencies"]
+        fields = [(key, fields[key]) for key in order if key in fields]
+
+        longest_field_name_length = max(len(name) for name, value in fields)
+        field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
+        body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
+        return header + body
+
     def _to_xml(self) -> str:
         """Generate xml representation of self for upload to server.
 
 
@@ -78,22 +78,22 @@ def run_model_on_task(
         Flow generated from the model.
     """
 
-    extension = get_extension_by_model(model, raise_if_no_extension=True)
-    if extension is None:
-        # This should never happen and is only here to please mypy will be gone soon once the
-        # whole function is removed
-        raise TypeError(extension)
-
     # TODO: At some point in the future do not allow for arguments in old order (6-2018).
     # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
     # When removing this please also remove the method `is_estimator` from the extension
     # interface as it is only used here (MF, 3-2019)
-    if isinstance(model, OpenMLTask) and extension.is_estimator(model):
+    if isinstance(model, OpenMLTask):
         warnings.warn("The old argument order (task, model) is deprecated and "
                       "will not be supported in the future. Please use the "
                       "order (model, task).", DeprecationWarning)
         task, model = model, task
 
+    extension = get_extension_by_model(model, raise_if_no_extension=True)
+    if extension is None:
+        # This should never happen and is only here to please mypy will be gone soon once the
+        # whole function is removed
+        raise TypeError(extension)
+
     flow = extension.model_to_flow(model)
 
     run = run_flow_on_task(
@@ -159,9 +159,6 @@ def run_flow_on_task(
     if flow_tags is not None and not isinstance(flow_tags, list):
         raise ValueError("flow_tags should be a list")
 
-    if task.task_id is None:
-        raise ValueError("The task should be published at OpenML")
-
     # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
     # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
     if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
@@ -171,6 +168,9 @@ def run_flow_on_task(
                       "order (model, Flow).", DeprecationWarning)
         task, flow = flow, task
 
+    if task.task_id is None:
+        raise ValueError("The task should be published at OpenML")
+
     flow.model = flow.extension.seed_model(flow.model, seed=seed)
 
     # We only need to sync with the server right now if we want to upload the flow,
Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@`
`6`	`6`	`get_datasets,`
`7`	`7`	`list_datasets,`
`8`	`8`	`status_update,`
	`9`	`+ list_qualities`
`9`	`10`	`)`
`10`	`11`	`from .dataset import OpenMLDataset`
`11`	`12`	`from .data_feature import OpenMLDataFeature`
`@@ -20,4 +21,5 @@`
`20`	`21`	`'OpenMLDataset',`
`21`	`22`	`'OpenMLDataFeature',`
`22`	`23`	`'status_update',`
	`24`	`+ 'list_qualities'`
`23`	`25`	`]`