Add summary of locally computed metrics to representation of run (#1214)

LennartPurucker · mfeurer · dependabot[bot] · web-flow · commit ce82fd50ac20 · 2023-03-01T11:34:53.000+01:00
* added additional task agnostic local result to print of run * add PR to progress.rst * fix comment typo * Update openml/runs/run.py Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de> * add a function to list available estimation procedures * refactor print to only work for supported task types and local measures * add test for pint out and update progress * added additional task agnostic local result to print of run * add PR to progress.rst * fix comment typo * Update openml/runs/run.py Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de> * add a function to list available estimation procedures * refactor print to only work for supported task types and local measures * add test for pint out and update progress * Fix CI Python 3.6 (#1218) * Try Ubunte 20.04 for Python 3.6 * use old ubuntu for python 3.6 * Bump docker/setup-buildx-action from 1 to 2 (#1221) Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 1 to 2. - [Release notes](https://github.com/docker/setup-buildx-action/releases) - [Commits](docker/setup-buildx-action@v1...v2) --- updated-dependencies: - dependency-name: docker/setup-buildx-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update run.py (#1194) * Update run.py * Update run.py updated description to not contain duplicate information. * Update run.py * add type hint for new function * update add description * Refactor if-statements (#1219) * Refactor if-statements * Add explicit names to conditional expression * Add 'dependencies' to better mimic OpenMLFlow * Ci python 38 (#1220) * Install custom numpy version for specific combination of Python3.8 and numpy * Debug output * Change syntax * move to coverage action v3 * Remove test output * added additional task agnostic local result to print of run * add PR to progress.rst * fix comment typo * Update openml/runs/run.py Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de> * add a function to list available estimation procedures * refactor print to only work for supported task types and local measures * add test for pint out and update progress * added additional task agnostic local result to print of run * add PR to progress.rst * add type hint for new function * update add description * fix run doc string --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de> Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Vishal Parmar <vishalm524112@gmail.com> Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -9,8 +9,11 @@ Changelog
 0.13.1
 ~~~~~~
 
+ * Add new contributions here.
+ * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation.
  * FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
  * FIX #1198: Support numpy 1.24 and higher.
+ * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
 
 0.13.0
 ~~~~~~
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -275,6 +275,39 @@ def list_evaluation_measures() -> List[str]:
     return qualities
 
 
+def list_estimation_procedures() -> List[str]:
+    """Return list of evaluation procedures available.
+
+    The function performs an API call to retrieve the entire list of
+    evaluation procedures' names that are available.
+
+    Returns
+    -------
+    list
+    """
+
+    api_call = "estimationprocedure/list"
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    api_results = xmltodict.parse(xml_string)
+
+    # Minimalistic check if the XML is useful
+    if "oml:estimationprocedures" not in api_results:
+        raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedures"')
+    if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]:
+        raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedure"')
+
+    if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list):
+        raise TypeError(
+            "Error in return XML, does not contain " '"oml:estimationprocedure" as a list'
+        )
+
+    prods = [
+        prod["oml:name"]
+        for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"]
+    ]
+    return prods
+
+
 def list_evaluations_setups(
     function: str,
     offset: Optional[int] = None,
diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -26,7 +26,7 @@
 
 
 class OpenMLRun(OpenMLBase):
-    """OpenML Run: result of running a model on an openml dataset.
+    """OpenML Run: result of running a model on an OpenML dataset.
 
     Parameters
     ----------
@@ -39,13 +39,13 @@ class OpenMLRun(OpenMLBase):
     setup_string: str
         The setup string of the run.
     output_files: Dict[str, str]
-        Specifies where each related file can be found. 
+        Specifies where each related file can be found.
     setup_id: int
         An integer representing the ID of the setup used for the run.
     tags: List[str]
         Representing the tags associated with the run.
     uploader: int
-        User ID of the uploader. 
+        User ID of the uploader.
     uploader_name: str
         The name of the person who uploaded the run.
     evaluations: Dict
@@ -71,15 +71,18 @@ class OpenMLRun(OpenMLBase):
     predictions_url: str
         The URL of the predictions file.
     task: OpenMLTask
-        An instance of the OpenMLTask class, representing the OpenML task associated with the run.
+        An instance of the OpenMLTask class, representing the OpenML task associated
+        with the run.
     flow: OpenMLFlow
-        An instance of the OpenMLFlow class, representing the OpenML flow associated with the run.
+        An instance of the OpenMLFlow class, representing the OpenML flow associated
+        with the run.
     run_id: int
         The ID of the run.
     description_text: str, optional
-        Description text to add to the predictions file. If left None, is set to the time the arff file is generated.
+        Description text to add to the predictions file. If left None, is set to the
+        time the arff file is generated.
     run_details: str, optional (default=None)
-        Description of the run stored in the run meta-data. 
+        Description of the run stored in the run meta-data.
     """
 
     def __init__(
@@ -158,8 +161,37 @@ def predictions(self) -> pd.DataFrame:
     def id(self) -> Optional[int]:
         return self.run_id
 
+    def _evaluation_summary(self, metric: str) -> str:
+        """Summarizes the evaluation of a metric over all folds.
+
+        The fold scores for the metric must exist already. During run creation,
+        by default, the MAE for OpenMLRegressionTask and the accuracy for
+        OpenMLClassificationTask/OpenMLLearningCurveTasktasks are computed.
+
+        If repetition exist, we take the mean over all repetitions.
+
+        Parameters
+        ----------
+        metric: str
+            Name of an evaluation metric that was used to compute fold scores.
+
+        Returns
+        -------
+        metric_summary: str
+            A formatted string that displays the metric's evaluation summary.
+            The summary consists of the mean and std.
+        """
+        fold_score_lists = self.fold_evaluations[metric].values()
+
+        # Get the mean and std over all repetitions
+        rep_means = [np.mean(list(x.values())) for x in fold_score_lists]
+        rep_stds = [np.std(list(x.values())) for x in fold_score_lists]
+
+        return "{:.4f} +- {:.4f}".format(np.mean(rep_means), np.mean(rep_stds))
+
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """Collect all information to display in the __repr__ body."""
+        # Set up fields
         fields = {
             "Uploader Name": self.uploader_name,
             "Metric": self.task_evaluation_measure,
@@ -175,6 +207,10 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             "Dataset ID": self.dataset_id,
             "Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id),
         }
+
+        # determines the order of the initial fields in which the information will be printed
+        order = ["Uploader Name", "Uploader Profile", "Metric", "Result"]
+
         if self.uploader is not None:
             fields["Uploader Profile"] = "{}/u/{}".format(
                 openml.config.get_server_base_url(), self.uploader
@@ -183,13 +219,29 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             fields["Run URL"] = self.openml_url
         if self.evaluations is not None and self.task_evaluation_measure in self.evaluations:
             fields["Result"] = self.evaluations[self.task_evaluation_measure]
-
-        # determines the order in which the information will be printed
-        order = [
-            "Uploader Name",
-            "Uploader Profile",
-            "Metric",
-            "Result",
+        elif self.fold_evaluations is not None:
+            # -- Add locally computed summary values if possible
+            if "predictive_accuracy" in self.fold_evaluations:
+                # OpenMLClassificationTask; OpenMLLearningCurveTask
+                # default: predictive_accuracy
+                result_field = "Local Result - Accuracy (+- STD)"
+                fields[result_field] = self._evaluation_summary("predictive_accuracy")
+                order.append(result_field)
+            elif "mean_absolute_error" in self.fold_evaluations:
+                # OpenMLRegressionTask
+                # default: mean_absolute_error
+                result_field = "Local Result - MAE (+- STD)"
+                fields[result_field] = self._evaluation_summary("mean_absolute_error")
+                order.append(result_field)
+
+            if "usercpu_time_millis" in self.fold_evaluations:
+                # Runtime should be available for most tasks types
+                rt_field = "Local Runtime - ms (+- STD)"
+                fields[rt_field] = self._evaluation_summary("usercpu_time_millis")
+                order.append(rt_field)
+
+        # determines the remaining order
+        order += [
             "Run ID",
             "Run URL",
             "Task ID",
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -531,6 +531,14 @@ def determine_grid_size(param_grid):
 
         # todo: check if runtime is present
         self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds, task_type=task_type)
+
+        # Check if run string and print representation do not run into an error
+        #   The above check already verifies that all columns needed for supported
+        #   representations are present.
+        #   Supported: SUPERVISED_CLASSIFICATION, LEARNING_CURVE, SUPERVISED_REGRESSION
+        str(run)
+        self.logger.info(run)
+
         return run
 
     def _run_and_upload_classification(