openml
diff --git a/‎examples/30_extended/custom_flow_tutorial.py‎
Lines changed: 205 additions & 0 deletions b/‎examples/30_extended/custom_flow_tutorial.py‎
Lines changed: 205 additions & 0 deletions
diff --git a/‎openml/runs/functions.py‎
Lines changed: 86 additions & 11 deletions b/‎openml/runs/functions.py‎
Lines changed: 86 additions & 11 deletions
@@ -0,0 +1,205 @@
+"""
+================================
+Creating and Using a Custom Flow
+================================
+
+The most convenient way to create a flow for your machine learning workflow is to generate it
+automatically as described in the `Obtain Flow IDs <https://openml.github.io/openml-python/master/examples/30_extended/flow_id_tutorial.html#sphx-glr-examples-30-extended-flow-id-tutorial-py>`_ tutorial.  # noqa E501
+However, there are scenarios where this is not possible, such
+as when the flow uses a framework without an extension or when the flow is described by a script.
+
+In those cases you can still create a custom flow by following the steps of this tutorial.
+As an example we will use the flows generated for the `AutoML Benchmark <https://openml.github.io/automlbenchmark/>`_,
+and also show how to link runs to the custom flow.
+"""
+
+####################################################################################################
+
+# License: BSD 3-Clause
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server at test.openml.org. This prevents the main
+#   server from crowding with example datasets, tasks, runs, and so on.
+from collections import OrderedDict
+import numpy as np
+
+import openml
+from openml import OpenMLClassificationTask
+from openml.runs.functions import format_prediction
+
+openml.config.start_using_configuration_for_example()
+
+####################################################################################################
+# 1. Defining the flow
+# ====================
+# The first step is to define all the hyperparameters of your flow.
+# The API pages feature a descriptions of each variable of the `OpenMLFlow <https://openml.github.io/openml-python/master/generated/openml.OpenMLFlow.html#openml.OpenMLFlow>`_.  # noqa E501
+# Note that `external version` and `name` together uniquely identify a flow.
+#
+# The AutoML Benchmark runs AutoML systems across a range of tasks.
+# OpenML stores Flows for each AutoML system. However, the AutoML benchmark adds
+# preprocessing to the flow, so should be described in a new flow.
+#
+# We will break down the flow arguments into several groups, for the tutorial.
+# First we will define the name and version information.
+# Make sure to leave enough information so others can determine exactly which
+# version of the package/script is used. Use tags so users can find your flow easily.
+
+general = dict(
+    name="automlbenchmark_autosklearn",
+    description=(
+        "Auto-sklearn as set up by the AutoML Benchmark"
+        "Source: https://github.com/openml/automlbenchmark/releases/tag/v0.9"
+    ),
+    external_version="amlb==0.9",
+    language="English",
+    tags=["amlb", "benchmark", "study_218"],
+    dependencies="amlb==0.9",
+)
+
+####################################################################################################
+# Next we define the flow hyperparameters. We define their name and default value in `parameters`,
+# and provide meta-data for each hyperparameter through `parameters_meta_info`.
+# Note that even though the argument name is `parameters` they describe the hyperparameters.
+# The use of ordered dicts is required.
+
+flow_hyperparameters = dict(
+    parameters=OrderedDict(time="240", memory="32", cores="8"),
+    parameters_meta_info=OrderedDict(
+        cores=OrderedDict(description="number of available cores", data_type="int"),
+        memory=OrderedDict(description="memory in gigabytes", data_type="int"),
+        time=OrderedDict(description="time in minutes", data_type="int"),
+    ),
+)
+
+####################################################################################################
+# It is possible to build a flow which uses other flows.
+# For example, the Random Forest Classifier is a flow, but you could also construct a flow
+# which uses a Random Forest Classifier in a ML pipeline. When constructing the pipeline flow,
+# you can use the Random Forest Classifier flow as a *subflow*. It allows for
+# all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow.
+#
+# In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow.
+# This allows people to specify auto-sklearn hyperparameters used in this flow.
+# In general, using a subflow is not required.
+#
+# Note: flow 15275 is not actually the right flow on the test server,
+# but that does not matter for this demonstration.
+
+autosklearn_flow = openml.flows.get_flow(15275)  # auto-sklearn 0.5.1
+subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
+
+####################################################################################################
+# With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
+# Because we provided all the details already, we do not need to provide a `model` to the flow.
+#
+# In our case, we don't even have a model. It is possible to have a model but still require
+# to follow these steps when the model (python object) does not have an extensions from which
+# to automatically extract the hyperparameters.
+# So whether you have a model with no extension or no model at all, explicitly set
+# the model of the flow to `None`.
+
+autosklearn_amlb_flow = openml.flows.OpenMLFlow(
+    **general, **flow_hyperparameters, **subflow, model=None,
+)
+autosklearn_amlb_flow.publish()
+print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")
+
+####################################################################################################
+# 2. Using the flow
+# ====================
+# This Section will show how to upload run data for your custom flow.
+# Take care to change the values of parameters as well as the task id,
+# to reflect the actual run.
+# Task and parameter values in the example are fictional.
+
+flow_id = autosklearn_amlb_flow.flow_id
+
+parameters = [
+    OrderedDict([("oml:name", "cores"), ("oml:value", 4), ("oml:component", flow_id)]),
+    OrderedDict([("oml:name", "memory"), ("oml:value", 16), ("oml:component", flow_id)]),
+    OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]),
+]
+
+task_id = 1408  # Iris Task
+task = openml.tasks.get_task(task_id)
+dataset_id = task.get_dataset().dataset_id
+
+
+####################################################################################################
+# The last bit of information for the run we need are the predicted values.
+# The exact format of the predictions will depend on the task.
+#
+# The predictions should always be a list of lists, each list should contain:
+# - the repeat number: for repeated evaluation strategies. (e.g. repeated cross-validation)
+# - the fold number: for cross-validation. (what should this be for holdout?)
+# - 0: this field is for backward compatibility.
+# - index: the row (of the original dataset) for which the prediction was made.
+# - p_1, ..., p_c: for each class the predicted probability of the sample
+#   belonging to that class. (no elements for regression tasks)
+#   Make sure the order of these elements follows the order of `task.class_labels`.
+# - the predicted class/value for the sample
+# - the true class/value for the sample
+#
+# When using openml-python extensions (such as through `run_model_on_task`),
+# all of this formatting is automatic.
+# Unfortunately we can not automate this procedure for custom flows,
+# which means a little additional effort is required.
+#
+# Here we generated some random predictions in place.
+# You can ignore this code, or use it to better understand the formatting of the predictions.
+#
+# Find the repeats/folds for this task:
+n_repeats, n_folds, _ = task.get_split_dimensions()
+all_test_indices = [
+    (repeat, fold, index)
+    for repeat in range(n_repeats)
+    for fold in range(n_folds)
+    for index in task.get_train_test_split_indices(fold, repeat)[1]
+]
+
+# random class probabilities (Iris has 150 samples and 3 classes):
+r = np.random.rand(150 * n_repeats, 3)
+# scale the random values so that the probabilities of each sample sum to 1:
+y_proba = r / r.sum(axis=1).reshape(-1, 1)
+y_pred = y_proba.argmax(axis=1)
+
+class_map = dict(zip(range(3), task.class_labels))
+_, y_true = task.get_X_and_y()
+y_true = [class_map[y] for y in y_true]
+
+# We format the predictions with the utility function `format_prediction`.
+# It will organize the relevant data in the expected format/order.
+predictions = []
+for where, y, yp, proba in zip(all_test_indices, y_true, y_pred, y_proba):
+    repeat, fold, index = where
+
+    prediction = format_prediction(
+        task=task,
+        repeat=repeat,
+        fold=fold,
+        index=index,
+        prediction=class_map[yp],
+        truth=y,
+        proba={c: pb for (c, pb) in zip(task.class_labels, proba)},
+    )
+    predictions.append(prediction)
+
+####################################################################################################
+# Finally we can create the OpenMLRun object and upload.
+# We use the argument setup_string because the used flow was a script.
+
+benchmark_command = f"python3 runbenchmark.py auto-sklearn medium -m aws -t 119"
+my_run = openml.runs.OpenMLRun(
+    task_id=task_id,
+    flow_id=flow_id,
+    dataset_id=dataset_id,
+    parameter_settings=parameters,
+    setup_string=benchmark_command,
+    data_content=predictions,
+    tags=["study_218"],
+    description_text="Run generated by the Custom Flow tutorial.",
+)
+my_run.publish()
+print("run created:", my_run.run_id)
+
+openml.config.stop_using_configuration_for_example()
@@ -4,6 +4,7 @@
 import io
 import itertools
 import os
+import time
 from typing import Any, List, Dict, Optional, Set, Tuple, Union, TYPE_CHECKING  # noqa F401
 import warnings
 
@@ -250,7 +251,8 @@ def run_flow_on_task(
     )
 
     data_content, trace, fold_evaluations, sample_evaluations = res
-
+    fields = [*run_environment, time.strftime("%c"), "Created by run_flow_on_task"]
+    generated_description = "\n".join(fields)
     run = OpenMLRun(
         task_id=task.task_id,
         flow_id=flow_id,
@@ -262,6 +264,7 @@ def run_flow_on_task(
         data_content=data_content,
         flow=flow,
         setup_string=flow.extension.create_setup_string(flow.model),
+        description_text=generated_description,
     )
 
     if (upload_flow or avoid_duplicate_runs) and flow.flow_id is not None:
@@ -478,13 +481,17 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
             for i, tst_idx in enumerate(test_indices):
 
-                arff_line = [rep_no, fold_no, sample_no, tst_idx]  # type: List[Any]
                 if task.class_labels is not None:
-                    for j, class_label in enumerate(task.class_labels):
-                        arff_line.append(proba_y[i][j])
-
-                    arff_line.append(task.class_labels[pred_y[i]])
-                    arff_line.append(task.class_labels[test_y[i]])
+                    arff_line = format_prediction(
+                        task=task,
+                        repeat=rep_no,
+                        fold=fold_no,
+                        sample=sample_no,
+                        index=tst_idx,
+                        prediction=task.class_labels[pred_y[i]],
+                        truth=task.class_labels[test_y[i]],
+                        proba=dict(zip(task.class_labels, proba_y[i])),
+                    )
                 else:
                     raise ValueError("The task has no class labels")
 
@@ -498,7 +505,15 @@ def _calculate_local_measure(sklearn_fn, openml_name):
         elif isinstance(task, OpenMLRegressionTask):
 
             for i in range(0, len(test_indices)):
-                arff_line = [rep_no, fold_no, test_indices[i], pred_y[i], test_y[i]]
+                arff_line = format_prediction(
+                    task=task,
+                    repeat=rep_no,
+                    fold=fold_no,
+                    index=test_indices[i],
+                    prediction=pred_y[i],
+                    truth=test_y[i],
+                )
+
                 arff_datacontent.append(arff_line)
 
             if add_local_measures:
@@ -815,7 +830,7 @@ def list_runs(
     study: Optional[int] = None,
     display_errors: bool = False,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     List all runs matching all of the given filters.
@@ -887,7 +902,7 @@ def list_runs(
         tag=tag,
         study=study,
         display_errors=display_errors,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -900,7 +915,7 @@ def _list_runs(
     study: Optional[int] = None,
     display_errors: bool = False,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     Perform API call `/run/list/{filters}'
@@ -1004,3 +1019,63 @@ def __list_runs(api_call, output_format="dict"):
         runs = pd.DataFrame.from_dict(runs, orient="index")
 
     return runs
+
+
+def format_prediction(
+    task: OpenMLSupervisedTask,
+    repeat: int,
+    fold: int,
+    index: int,
+    prediction: Union[str, int, float],
+    truth: Union[str, int, float],
+    sample: Optional[int] = None,
+    proba: Optional[Dict[str, float]] = None,
+) -> List[Union[str, int, float]]:
+    """ Format the predictions in the specific order as required for the run results.
+
+    Parameters
+    ----------
+    task: OpenMLSupervisedTask
+        Task for which to format the predictions.
+    repeat: int
+        From which repeat this predictions is made.
+    fold: int
+        From which fold this prediction is made.
+    index: int
+        For which index this prediction is made.
+    prediction: str, int or float
+        The predicted class label or value.
+    truth: str, int or float
+        The true class label or value.
+    sample: int, optional (default=None)
+        From which sample set this prediction is made.
+        Required only for LearningCurve tasks.
+    proba: Dict[str, float], optional (default=None)
+        For classification tasks only.
+        A mapping from each class label to their predicted probability.
+        The dictionary should contain an entry for each of the `task.class_labels`.
+        E.g.: {"Iris-Setosa": 0.2, "Iris-Versicolor": 0.7, "Iris-Virginica": 0.1}
+
+    Returns
+    -------
+    A list with elements for the prediction results of a run.
+
+    """
+    if isinstance(task, OpenMLClassificationTask):
+        if proba is None:
+            raise ValueError("`proba` is required for classification task")
+        if task.class_labels is None:
+            raise ValueError("The classification task must have class labels set")
+        if not set(task.class_labels) == set(proba):
+            raise ValueError("Each class should have a predicted probability")
+        if sample is None:
+            if isinstance(task, OpenMLLearningCurveTask):
+                raise ValueError("`sample` can not be none for LearningCurveTask")
+            else:
+                sample = 0
+        probabilities = [proba[c] for c in task.class_labels]
+        return [repeat, fold, sample, index, *probabilities, truth, prediction]
+    elif isinstance(task, OpenMLRegressionTask):
+        return [repeat, fold, index, truth, prediction]
+    else:
+        raise NotImplementedError(f"Formatting for {type(task)} is not supported.")