Skip to content

Commit ce82fd5

Browse files
LennartPuruckermfeurerdependabot[bot]v-parmarPGijsbers
authored
Add summary of locally computed metrics to representation of run (#1214)
* added additional task agnostic local result to print of run * add PR to progress.rst * fix comment typo * Update openml/runs/run.py Co-authored-by: Matthias Feurer <[email protected]> * add a function to list available estimation procedures * refactor print to only work for supported task types and local measures * add test for pint out and update progress * added additional task agnostic local result to print of run * add PR to progress.rst * fix comment typo * Update openml/runs/run.py Co-authored-by: Matthias Feurer <[email protected]> * add a function to list available estimation procedures * refactor print to only work for supported task types and local measures * add test for pint out and update progress * Fix CI Python 3.6 (#1218) * Try Ubunte 20.04 for Python 3.6 * use old ubuntu for python 3.6 * Bump docker/setup-buildx-action from 1 to 2 (#1221) Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 1 to 2. - [Release notes](https://github.com/docker/setup-buildx-action/releases) - [Commits](docker/setup-buildx-action@v1...v2) --- updated-dependencies: - dependency-name: docker/setup-buildx-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update run.py (#1194) * Update run.py * Update run.py updated description to not contain duplicate information. * Update run.py * add type hint for new function * update add description * Refactor if-statements (#1219) * Refactor if-statements * Add explicit names to conditional expression * Add 'dependencies' to better mimic OpenMLFlow * Ci python 38 (#1220) * Install custom numpy version for specific combination of Python3.8 and numpy * Debug output * Change syntax * move to coverage action v3 * Remove test output * added additional task agnostic local result to print of run * add PR to progress.rst * fix comment typo * Update openml/runs/run.py Co-authored-by: Matthias Feurer <[email protected]> * add a function to list available estimation procedures * refactor print to only work for supported task types and local measures * add test for pint out and update progress * added additional task agnostic local result to print of run * add PR to progress.rst * add type hint for new function * update add description * fix run doc string --------- Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: Matthias Feurer <[email protected]> Co-authored-by: Matthias Feurer <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Vishal Parmar <[email protected]> Co-authored-by: Pieter Gijsbers <[email protected]>
1 parent c0a75bd commit ce82fd5

File tree

4 files changed

+110
-14
lines changed

4 files changed

+110
-14
lines changed

doc/progress.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ Changelog
99
0.13.1
1010
~~~~~~
1111

12+
* Add new contributions here.
13+
* ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation.
1214
* FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
1315
* FIX #1198: Support numpy 1.24 and higher.
16+
* ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
1417

1518
0.13.0
1619
~~~~~~

openml/evaluations/functions.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,39 @@ def list_evaluation_measures() -> List[str]:
275275
return qualities
276276

277277

278+
def list_estimation_procedures() -> List[str]:
279+
"""Return list of evaluation procedures available.
280+
281+
The function performs an API call to retrieve the entire list of
282+
evaluation procedures' names that are available.
283+
284+
Returns
285+
-------
286+
list
287+
"""
288+
289+
api_call = "estimationprocedure/list"
290+
xml_string = openml._api_calls._perform_api_call(api_call, "get")
291+
api_results = xmltodict.parse(xml_string)
292+
293+
# Minimalistic check if the XML is useful
294+
if "oml:estimationprocedures" not in api_results:
295+
raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedures"')
296+
if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]:
297+
raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedure"')
298+
299+
if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list):
300+
raise TypeError(
301+
"Error in return XML, does not contain " '"oml:estimationprocedure" as a list'
302+
)
303+
304+
prods = [
305+
prod["oml:name"]
306+
for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"]
307+
]
308+
return prods
309+
310+
278311
def list_evaluations_setups(
279312
function: str,
280313
offset: Optional[int] = None,

openml/runs/run.py

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727

2828
class OpenMLRun(OpenMLBase):
29-
"""OpenML Run: result of running a model on an openml dataset.
29+
"""OpenML Run: result of running a model on an OpenML dataset.
3030
3131
Parameters
3232
----------
@@ -39,13 +39,13 @@ class OpenMLRun(OpenMLBase):
3939
setup_string: str
4040
The setup string of the run.
4141
output_files: Dict[str, str]
42-
Specifies where each related file can be found.
42+
Specifies where each related file can be found.
4343
setup_id: int
4444
An integer representing the ID of the setup used for the run.
4545
tags: List[str]
4646
Representing the tags associated with the run.
4747
uploader: int
48-
User ID of the uploader.
48+
User ID of the uploader.
4949
uploader_name: str
5050
The name of the person who uploaded the run.
5151
evaluations: Dict
@@ -71,15 +71,18 @@ class OpenMLRun(OpenMLBase):
7171
predictions_url: str
7272
The URL of the predictions file.
7373
task: OpenMLTask
74-
An instance of the OpenMLTask class, representing the OpenML task associated with the run.
74+
An instance of the OpenMLTask class, representing the OpenML task associated
75+
with the run.
7576
flow: OpenMLFlow
76-
An instance of the OpenMLFlow class, representing the OpenML flow associated with the run.
77+
An instance of the OpenMLFlow class, representing the OpenML flow associated
78+
with the run.
7779
run_id: int
7880
The ID of the run.
7981
description_text: str, optional
80-
Description text to add to the predictions file. If left None, is set to the time the arff file is generated.
82+
Description text to add to the predictions file. If left None, is set to the
83+
time the arff file is generated.
8184
run_details: str, optional (default=None)
82-
Description of the run stored in the run meta-data.
85+
Description of the run stored in the run meta-data.
8386
"""
8487

8588
def __init__(
@@ -158,8 +161,37 @@ def predictions(self) -> pd.DataFrame:
158161
def id(self) -> Optional[int]:
159162
return self.run_id
160163

164+
def _evaluation_summary(self, metric: str) -> str:
165+
"""Summarizes the evaluation of a metric over all folds.
166+
167+
The fold scores for the metric must exist already. During run creation,
168+
by default, the MAE for OpenMLRegressionTask and the accuracy for
169+
OpenMLClassificationTask/OpenMLLearningCurveTasktasks are computed.
170+
171+
If repetition exist, we take the mean over all repetitions.
172+
173+
Parameters
174+
----------
175+
metric: str
176+
Name of an evaluation metric that was used to compute fold scores.
177+
178+
Returns
179+
-------
180+
metric_summary: str
181+
A formatted string that displays the metric's evaluation summary.
182+
The summary consists of the mean and std.
183+
"""
184+
fold_score_lists = self.fold_evaluations[metric].values()
185+
186+
# Get the mean and std over all repetitions
187+
rep_means = [np.mean(list(x.values())) for x in fold_score_lists]
188+
rep_stds = [np.std(list(x.values())) for x in fold_score_lists]
189+
190+
return "{:.4f} +- {:.4f}".format(np.mean(rep_means), np.mean(rep_stds))
191+
161192
def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
162193
"""Collect all information to display in the __repr__ body."""
194+
# Set up fields
163195
fields = {
164196
"Uploader Name": self.uploader_name,
165197
"Metric": self.task_evaluation_measure,
@@ -175,6 +207,10 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
175207
"Dataset ID": self.dataset_id,
176208
"Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id),
177209
}
210+
211+
# determines the order of the initial fields in which the information will be printed
212+
order = ["Uploader Name", "Uploader Profile", "Metric", "Result"]
213+
178214
if self.uploader is not None:
179215
fields["Uploader Profile"] = "{}/u/{}".format(
180216
openml.config.get_server_base_url(), self.uploader
@@ -183,13 +219,29 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
183219
fields["Run URL"] = self.openml_url
184220
if self.evaluations is not None and self.task_evaluation_measure in self.evaluations:
185221
fields["Result"] = self.evaluations[self.task_evaluation_measure]
186-
187-
# determines the order in which the information will be printed
188-
order = [
189-
"Uploader Name",
190-
"Uploader Profile",
191-
"Metric",
192-
"Result",
222+
elif self.fold_evaluations is not None:
223+
# -- Add locally computed summary values if possible
224+
if "predictive_accuracy" in self.fold_evaluations:
225+
# OpenMLClassificationTask; OpenMLLearningCurveTask
226+
# default: predictive_accuracy
227+
result_field = "Local Result - Accuracy (+- STD)"
228+
fields[result_field] = self._evaluation_summary("predictive_accuracy")
229+
order.append(result_field)
230+
elif "mean_absolute_error" in self.fold_evaluations:
231+
# OpenMLRegressionTask
232+
# default: mean_absolute_error
233+
result_field = "Local Result - MAE (+- STD)"
234+
fields[result_field] = self._evaluation_summary("mean_absolute_error")
235+
order.append(result_field)
236+
237+
if "usercpu_time_millis" in self.fold_evaluations:
238+
# Runtime should be available for most tasks types
239+
rt_field = "Local Runtime - ms (+- STD)"
240+
fields[rt_field] = self._evaluation_summary("usercpu_time_millis")
241+
order.append(rt_field)
242+
243+
# determines the remaining order
244+
order += [
193245
"Run ID",
194246
"Run URL",
195247
"Task ID",

tests/test_runs/test_run_functions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,14 @@ def determine_grid_size(param_grid):
531531

532532
# todo: check if runtime is present
533533
self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds, task_type=task_type)
534+
535+
# Check if run string and print representation do not run into an error
536+
# The above check already verifies that all columns needed for supported
537+
# representations are present.
538+
# Supported: SUPERVISED_CLASSIFICATION, LEARNING_CURVE, SUPERVISED_REGRESSION
539+
str(run)
540+
self.logger.info(run)
541+
534542
return run
535543

536544
def _run_and_upload_classification(

0 commit comments

Comments
 (0)