Merge branch 'develop' into learningcurves

janvanrijn · web-flow · commit 993dbeaa552d · 2017-06-27T15:02:44.000+02:00
diff --git a/examples/OpenMLDemo.ipynb b/examples/OpenMLDemo.ipynb
diff --git a/examples/OpenML_Tutorial.ipynb b/examples/OpenML_Tutorial.ipynb
diff --git a/examples/PyOpenML.ipynb b/examples/PyOpenML.ipynb
diff --git a/openml/evaluations/__init__.py b/openml/evaluations/__init__.py
@@ -0,0 +1,2 @@
+from .evaluation import OpenMLEvaluation
+from .functions import list_evaluations
diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py
@@ -0,0 +1,40 @@
+
+class OpenMLEvaluation(object):
+    '''
+    Contains all meta-information about a run / evaluation combination,
+    according to the evaluation/list function
+
+    Parameters
+       ----------
+        run_id : int
+        task_id : int
+        setup_id : int
+        flow_id : int
+        flow_name : str
+        data_id : int
+        data_name : str
+            the name of the dataset
+        function : str
+            the evaluation function of this item (e.g., accuracy)
+        upload_time : str
+            the time of evaluation
+        value : float
+            the value of this evaluation
+        array_data : str
+            list of information per class (e.g., in case of precision, auroc, recall)
+    '''
+    def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
+                 data_id, data_name, function, upload_time, value,
+                 array_data=None):
+        self.run_id = run_id
+        self.task_id = task_id
+        self.setup_id = setup_id
+        self.flow_id = flow_id
+        self.flow_name = flow_name
+        self.data_id = data_id
+        self.data_name = data_name
+        self.function = function
+        self.upload_time = upload_time
+        self.value = value
+        self.array_data = array_data
+
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -0,0 +1,93 @@
+import xmltodict
+
+from .._api_calls import _perform_api_call
+from ..evaluations import OpenMLEvaluation
+
+def list_evaluations(function, offset=None, size=None, id=None, task=None, setup=None,
+              flow=None, uploader=None, tag=None):
+    """List all run-evaluation pairs matching all of the given filters.
+
+        Perform API call `/evaluation/function{function}/{filters} 
+        
+        Parameters
+        ----------
+        function : str 
+            the evaluation function. e.g., predictive_accuracy
+        offset : int, optional
+            the number of runs to skip, starting from the first
+        size : int, optional
+            the maximum number of runs to show
+
+        id : list, optional
+
+        task : list, optional
+
+        setup: list, optional
+
+        flow : list, optional
+
+        uploader : list, optional
+
+        tag : str, optional
+
+        Returns
+        -------
+        list
+            List of found evaluations.
+        """
+
+    api_call = "evaluation/list/function/%s" %function
+    if offset is not None:
+        api_call += "/offset/%d" % int(offset)
+    if size is not None:
+        api_call += "/limit/%d" % int(size)
+    if id is not None:
+        api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
+    if task is not None:
+        api_call += "/task/%s" % ','.join([str(int(i)) for i in task])
+    if setup is not None:
+        api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
+    if flow is not None:
+        api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
+    if uploader is not None:
+        api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
+    if tag is not None:
+        api_call += "/tag/%s" % tag
+
+    return _list_evaluations(api_call)
+
+
+def _list_evaluations(api_call):
+    """Helper function to parse API calls which are lists of runs"""
+
+    xml_string = _perform_api_call(api_call)
+
+    evals_dict = xmltodict.parse(xml_string)
+    # Minimalistic check if the XML is useful
+    if 'oml:evaluations' not in evals_dict:
+        raise ValueError('Error in return XML, does not contain "oml:evaluations": %s'
+                         % str(evals_dict))
+
+    if isinstance(evals_dict['oml:evaluations']['oml:evaluation'], list):
+        evals_list = evals_dict['oml:evaluations']['oml:evaluation']
+    elif isinstance(evals_dict['oml:evaluations']['oml:evaluation'], dict):
+        evals_list = [evals_dict['oml:evaluations']['oml:evaluation']]
+    else:
+        raise TypeError()
+
+    evals = dict()
+    for eval_ in evals_list:
+        run_id = int(eval_['oml:run_id'])
+        array_data = None
+        if 'oml:array_data' in eval_:
+            eval_['oml:array_data']
+
+        evaluation = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']),
+                                      int(eval_['oml:setup_id']), int(eval_['oml:flow_id']),
+                                      eval_['oml:flow_name'], eval_['oml:data_id'],
+                                      eval_['oml:data_name'], eval_['oml:function'],
+                                      eval_['oml:upload_time'], float(eval_['oml:value']),
+                                      array_data)
+        evals[run_id] = evaluation
+    return evals
+
diff --git a/openml/study/__init__.py b/openml/study/__init__.py
@@ -0,0 +1,2 @@
+from .study import OpenMLStudy
+from .functions import get_study
diff --git a/openml/study/functions.py b/openml/study/functions.py
@@ -0,0 +1,53 @@
+import xmltodict
+
+from openml.study import OpenMLStudy
+from .._api_calls import _perform_api_call
+
+def _multitag_to_list(result_dict, tag):
+    if isinstance(result_dict[tag], list):
+        return result_dict[tag]
+    elif isinstance(result_dict[tag], dict):
+        return [result_dict[tag]]
+    else:
+        raise TypeError()
+
+
+def get_study(study_id):
+    '''
+    Retrieves all relevant information of an OpenML study from the server
+    Note that some of the (data, tasks, flows, setups) fields can be empty
+    (depending on information on the server)
+    '''
+    xml_string = _perform_api_call("study/%d" %(study_id))
+    result_dict = xmltodict.parse(xml_string)['oml:study']
+    id = int(result_dict['oml:id'])
+    name = result_dict['oml:name']
+    description = result_dict['oml:description']
+    creation_date = result_dict['oml:creation_date']
+    creator = result_dict['oml:creator']
+    tags = []
+    for tag in _multitag_to_list(result_dict, 'oml:tag'):
+        tags.append({'name': tag['oml:name'],
+                     'window_start': tag['oml:window_start'],
+                     'write_access': tag['oml:write_access']})
+
+    datasets = None
+    tasks = None
+    flows = None
+    setups = None
+
+    if 'oml:data' in result_dict:
+        datasets = [int(x) for x in result_dict['oml:data']['oml:data_id']]
+
+    if 'oml:tasks' in result_dict:
+        tasks = [int(x) for x in result_dict['oml:tasks']['oml:task_id']]
+
+    if 'oml:flows' in result_dict:
+        flows = [int(x) for x in result_dict['oml:flows']['oml:flow_id']]
+
+    if 'oml:setups' in result_dict:
+        setups = [int(x) for x in result_dict['oml:setups']['oml:setup_id']]
+
+    study = OpenMLStudy(id, name, description, creation_date, creator, tags,
+                        datasets, tasks, flows, setups)
+    return study
diff --git a/openml/study/study.py b/openml/study/study.py
@@ -0,0 +1,51 @@
+
+class OpenMLStudy(object):
+    '''
+    An OpenMLStudy represents the OpenML concept of a study. It contains
+    the following information: name, id, description, creation date,
+    creator id and a set of tags.
+
+    According to this list of tags, the study object receives a list of
+    OpenML object ids (datasets, flows, tasks and setups).
+
+    Can be used to obtain all relevant information from a study at once.
+
+    Parameters
+       ----------
+        id : int
+            the study id
+        name : str
+            the name of the study (meta-info)
+        description : str
+            brief description (meta-info)
+        creation_date : str
+            date of creation (meta-info)
+        creator : int
+            openml user id of the owner / creator
+        tag : list(dict)
+            The list of tags shows which tags are associated with the study.
+            Each tag is a dict of (tag) name, window_start and write_access.
+        data : list
+            a list of data ids associated with this study
+        tasks : list
+            a list of task ids associated with this study
+        flows : list
+            a list of flow ids associated with this study
+        setups : list
+            a list of setup ids associated with this study
+    '''
+
+    def __init__(self, id, name, description, creation_date, creator,
+                 tag, data, tasks, flows, setups):
+        self.id = id
+        self.name = name
+        self.description = description
+        self.creation_date = creation_date
+        self.creator = creator
+        self.tag = tag
+        self.data = data
+        self.tasks = tasks
+        self.flows = flows
+        self.setups = setups
+        pass
+
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
@@ -0,0 +1,71 @@
+import openml
+import openml.evaluations
+from openml.testing import TestBase
+
+class TestEvaluationFunctions(TestBase):
+
+    def test_evaluation_list_filter_task(self):
+        openml.config.server = self.production_server
+
+        task_id = 7312
+
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", task=[task_id])
+
+        self.assertGreater(len(evaluations), 100)
+        for run_id in evaluations.keys():
+            self.assertEquals(evaluations[run_id].task_id, task_id)
+
+
+    def test_evaluation_list_filter_uploader(self):
+        openml.config.server = self.production_server
+
+        uploader_id = 16
+
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", uploader=[uploader_id])
+
+        self.assertGreater(len(evaluations), 100)
+        # for run_id in evaluations.keys():
+        #     self.assertEquals(evaluations[run_id].uploader, uploader_id)
+
+
+    def test_evaluation_list_filter_uploader(self):
+        openml.config.server = self.production_server
+
+        setup_id = 10
+
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setup=[setup_id])
+
+        self.assertGreater(len(evaluations), 100)
+        for run_id in evaluations.keys():
+            self.assertEquals(evaluations[run_id].setup_id, setup_id)
+
+
+    def test_evaluation_list_filter_flow(self):
+        openml.config.server = self.production_server
+
+        flow_id = 100
+
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flow=[flow_id])
+
+        self.assertGreater(len(evaluations), 2)
+        for run_id in evaluations.keys():
+            self.assertEquals(evaluations[run_id].flow_id, flow_id)
+
+
+    def test_evaluation_list_filter_run(self):
+        openml.config.server = self.production_server
+
+        run_id = 1
+
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", id=[run_id])
+
+        self.assertEquals(len(evaluations), 1)
+        for run_id in evaluations.keys():
+            self.assertEquals(evaluations[run_id].run_id, run_id)
+
+
+    def test_evaluation_list_limit(self):
+        openml.config.server = self.production_server
+
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", size=100, offset=100)
+        self.assertEquals(len(evaluations), 100)
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
@@ -0,0 +1,16 @@
+import openml
+import openml.study
+from openml.testing import TestBase
+
+class TestStudyFunctions(TestBase):
+
+    def test_get_study(self):
+        openml.config.server = self.production_server
+
+        study_id = 34
+
+        study = openml.study.get_study(study_id)
+        self.assertEquals(len(study.data), 105)
+        self.assertEquals(len(study.tasks), 105)
+        self.assertEquals(len(study.flows), 27)
+        self.assertEquals(len(study.setups), 30)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .evaluation import OpenMLEvaluation`
	`2`	`+from .functions import list_evaluations`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .study import OpenMLStudy`
	`2`	`+from .functions import get_study`