openml
diff --git a/‎openml/__init__.py‎
Lines changed: 5 additions & 2 deletions b/‎openml/__init__.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎openml/__version__.py‎
Lines changed: 4 additions & 0 deletions b/‎openml/__version__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎openml/_api_calls.py‎
Lines changed: 10 additions & 4 deletions b/‎openml/_api_calls.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎openml/datasets/dataset.py‎
Lines changed: 8 additions & 1 deletion b/‎openml/datasets/dataset.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎openml/datasets/functions.py‎
Lines changed: 19 additions & 4 deletions b/‎openml/datasets/functions.py‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎openml/evaluations/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎openml/evaluations/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎openml/evaluations/evaluation.py‎
Lines changed: 40 additions & 0 deletions b/‎openml/evaluations/evaluation.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎openml/evaluations/functions.py‎
Lines changed: 93 additions & 0 deletions b/‎openml/evaluations/functions.py‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎openml/flows/sklearn_converter.py‎
Lines changed: 5 additions & 2 deletions b/‎openml/flows/sklearn_converter.py‎
Lines changed: 5 additions & 2 deletions
@@ -22,11 +22,13 @@
 from . import runs
 from . import flows
 from . import setups
+from . import study
+from . import evaluations
 from .runs import OpenMLRun
 from .tasks import OpenMLTask, OpenMLSplit
 from .flows import OpenMLFlow
 
-__version__ = "0.4.0dev"
+from .__version__ import __version__
 
 
 def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
@@ -66,5 +68,6 @@ def populate_cache(task_ids=None, dataset_ids=None, flow_ids=None,
 
 
 __all__ = ['OpenMLDataset', 'OpenMLDataFeature', 'OpenMLRun',
-           'OpenMLSplit', 'datasets', 'OpenMLTask', 'OpenMLFlow',
+           'OpenMLSplit', 'OpenMLEvaluation', 'OpenMLSetup',
+           'OpenMLTask', 'OpenMLFlow', 'datasets', 'evaluations',
            'config', 'runs', 'flows', 'tasks', 'setups']
@@ -0,0 +1,4 @@
+"""Version information."""
+
+# The following line *must* be the last in the module, exactly as formatted:
+__version__ = "0.5.0dev"
@@ -104,11 +104,16 @@ def _read_url_files(url, data=None, file_dictionary=None, file_elements=None):
 def _read_url(url, data=None):
 
     data = {} if data is None else data
-    data['api_key'] = config.apikey
+    if config.apikey is not None:
+        data['api_key'] = config.apikey
 
-    # Using requests.post sets header 'Accept-encoding' automatically to
-    # 'gzip,deflate'
-    response = requests.post(url, data=data)
+    if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
+        # do a GET
+        response = requests.get(url, params=data)
+    else: # an actual post request
+        # Using requests.post sets header 'Accept-encoding' automatically to
+        #  'gzip,deflate'
+        response = requests.post(url, data=data)
 
     if response.status_code != 200:
         raise _parse_server_exception(response)
@@ -117,6 +122,7 @@ def _read_url(url, data=None):
         warnings.warn('Received uncompressed content from OpenML for %s.' % url)
     return response.text
 
+
 def _parse_server_exception(response):
     # OpenML has a sopisticated error system
     # where information about failures is provided. try to parse this
 
@@ -39,7 +39,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
                  row_id_attribute=None, ignore_attribute=None,
                  version_label=None, citation=None, tag=None, visibility=None,
                  original_data_url=None, paper_url=None, update_comment=None,
-                 md5_checksum=None, data_file=None, features=None):
+                 md5_checksum=None, data_file=None, features=None, qualities=None):
         # Attributes received by querying the RESTful API
         self.dataset_id = int(dataset_id) if dataset_id is not None else None
         self.name = name
@@ -74,6 +74,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
         self.md5_cheksum = md5_checksum
         self.data_file = data_file
         self.features = None
+        self.qualities = None
 
         if features is not None:
             self.features = {}
@@ -87,6 +88,12 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None,
                     raise ValueError('Data features not provided in right order')
                 self.features[feature.index] = feature
 
+        if qualities is not None:
+            self.qualities = {}
+            for idx, xmlquality in enumerate(qualities['oml:quality']):
+                name = xmlquality['oml:name']
+                value = xmlquality['oml:value']
+                self.qualities[name] = value
 
         if data_file is not None:
             if self._data_features_supported():
 
@@ -75,7 +75,8 @@ def _get_cached_dataset(dataset_id):
     description = _get_cached_dataset_description(dataset_id)
     arff_file = _get_cached_dataset_arff(dataset_id)
     features = _get_cached_dataset_features(dataset_id)
-    dataset = _create_dataset_from_description(description, features, arff_file)
+    qualities = _get_cached_dataset_qualities(dataset_id)
+    dataset = _create_dataset_from_description(description, features, qualities, arff_file)
 
     return dataset
 
@@ -107,6 +108,19 @@ def _get_cached_dataset_features(dataset_id):
                                    "cached" % dataset_id)
 
 
+def _get_cached_dataset_qualities(dataset_id):
+    cache_dir = config.get_cache_directory()
+    did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
+    qualities_file = os.path.join(did_cache_dir, "qualities.xml")
+    try:
+        with io.open(qualities_file, encoding='utf8') as fh:
+            qualities_xml = fh.read()
+            return xmltodict.parse(qualities_xml)["oml:data_qualities"]
+    except (IOError, OSError):
+        raise OpenMLCacheException("Dataset qualities for dataset id %d not "
+                                   "cached" % dataset_id)
+
+
 def _get_cached_dataset_arff(dataset_id):
     cache_dir = config.get_cache_directory()
     did_cache_dir = os.path.join(cache_dir, "datasets", str(dataset_id))
@@ -272,7 +286,7 @@ def get_dataset(dataset_id):
         _remove_dataset_cache_dir(did_cache_dir)
         raise e
 
-    dataset = _create_dataset_from_description(description, features, arff_file)
+    dataset = _create_dataset_from_description(description, features, qualities, arff_file)
     return dataset
 
 
@@ -470,7 +484,7 @@ def _remove_dataset_cache_dir(did_cache_dir):
                              'Please do this manually!' % did_cache_dir)
 
 
-def _create_dataset_from_description(description, features, arff_file):
+def _create_dataset_from_description(description, features, qualities, arff_file):
     """Create a dataset object from a description dict.
 
     Parameters
@@ -510,5 +524,6 @@ def _create_dataset_from_description(description, features, arff_file):
         description.get("oml:update_comment"),
         description.get("oml:md5_checksum"),
         data_file=arff_file,
-        features=features)
+        features=features,
+        qualities=qualities)
     return dataset
@@ -0,0 +1,2 @@
+from .evaluation import OpenMLEvaluation
+from .functions import list_evaluations
@@ -0,0 +1,40 @@
+
+class OpenMLEvaluation(object):
+    '''
+    Contains all meta-information about a run / evaluation combination,
+    according to the evaluation/list function
+
+    Parameters
+       ----------
+        run_id : int
+        task_id : int
+        setup_id : int
+        flow_id : int
+        flow_name : str
+        data_id : int
+        data_name : str
+            the name of the dataset
+        function : str
+            the evaluation function of this item (e.g., accuracy)
+        upload_time : str
+            the time of evaluation
+        value : float
+            the value of this evaluation
+        array_data : str
+            list of information per class (e.g., in case of precision, auroc, recall)
+    '''
+    def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
+                 data_id, data_name, function, upload_time, value,
+                 array_data=None):
+        self.run_id = run_id
+        self.task_id = task_id
+        self.setup_id = setup_id
+        self.flow_id = flow_id
+        self.flow_name = flow_name
+        self.data_id = data_id
+        self.data_name = data_name
+        self.function = function
+        self.upload_time = upload_time
+        self.value = value
+        self.array_data = array_data
+
@@ -0,0 +1,93 @@
+import xmltodict
+
+from .._api_calls import _perform_api_call
+from ..evaluations import OpenMLEvaluation
+
+def list_evaluations(function, offset=None, size=None, id=None, task=None, setup=None,
+              flow=None, uploader=None, tag=None):
+    """List all run-evaluation pairs matching all of the given filters.
+
+        Perform API call `/evaluation/function{function}/{filters} 
+        
+        Parameters
+        ----------
+        function : str 
+            the evaluation function. e.g., predictive_accuracy
+        offset : int, optional
+            the number of runs to skip, starting from the first
+        size : int, optional
+            the maximum number of runs to show
+
+        id : list, optional
+
+        task : list, optional
+
+        setup: list, optional
+
+        flow : list, optional
+
+        uploader : list, optional
+
+        tag : str, optional
+
+        Returns
+        -------
+        list
+            List of found evaluations.
+        """
+
+    api_call = "evaluation/list/function/%s" %function
+    if offset is not None:
+        api_call += "/offset/%d" % int(offset)
+    if size is not None:
+        api_call += "/limit/%d" % int(size)
+    if id is not None:
+        api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
+    if task is not None:
+        api_call += "/task/%s" % ','.join([str(int(i)) for i in task])
+    if setup is not None:
+        api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
+    if flow is not None:
+        api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
+    if uploader is not None:
+        api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
+    if tag is not None:
+        api_call += "/tag/%s" % tag
+
+    return _list_evaluations(api_call)
+
+
+def _list_evaluations(api_call):
+    """Helper function to parse API calls which are lists of runs"""
+
+    xml_string = _perform_api_call(api_call)
+
+    evals_dict = xmltodict.parse(xml_string)
+    # Minimalistic check if the XML is useful
+    if 'oml:evaluations' not in evals_dict:
+        raise ValueError('Error in return XML, does not contain "oml:evaluations": %s'
+                         % str(evals_dict))
+
+    if isinstance(evals_dict['oml:evaluations']['oml:evaluation'], list):
+        evals_list = evals_dict['oml:evaluations']['oml:evaluation']
+    elif isinstance(evals_dict['oml:evaluations']['oml:evaluation'], dict):
+        evals_list = [evals_dict['oml:evaluations']['oml:evaluation']]
+    else:
+        raise TypeError()
+
+    evals = dict()
+    for eval_ in evals_list:
+        run_id = int(eval_['oml:run_id'])
+        array_data = None
+        if 'oml:array_data' in eval_:
+            eval_['oml:array_data']
+
+        evaluation = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']),
+                                      int(eval_['oml:setup_id']), int(eval_['oml:flow_id']),
+                                      eval_['oml:flow_name'], eval_['oml:data_id'],
+                                      eval_['oml:data_name'], eval_['oml:function'],
+                                      eval_['oml:upload_time'], float(eval_['oml:value']),
+                                      array_data)
+        evals[run_id] = evaluation
+    return evals
+
@@ -586,10 +586,13 @@ def check(param_dict, disallow_parameter=False):
         elif isinstance(model, sklearn.model_selection.RandomizedSearchCV):
             param_distributions = model.param_distributions
         else:
+            if hasattr(model, 'param_distributions'):
+                param_distributions = model.param_distributions
+            else:
+                raise AttributeError('Using subclass BaseSearchCV other than {GridSearchCV, RandomizedSearchCV}. Could not find attribute param_distributions. ')
             print('Warning! Using subclass BaseSearchCV other than ' \
                   '{GridSearchCV, RandomizedSearchCV}. Should implement param check. ')
-            pass
-
+            
         if not check(param_distributions, True):
             raise PyOpenMLError('openml-python should not be used to '
                                 'optimize the n_jobs parameter.')
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .evaluation import OpenMLEvaluation`
	`2`	`+from .functions import list_evaluations`