|
7 | 7 |
|
8 | 8 | import openml |
9 | 9 | import pandas as pd |
10 | | -from pprint import pprint |
11 | 10 |
|
12 | 11 | ############################################################################ |
13 | 12 | # |
|
40 | 39 | tasks = pd.DataFrame.from_dict(tasks, orient='index') |
41 | 40 | print(tasks.columns) |
42 | 41 | print("First 5 of %s tasks:" % len(tasks)) |
43 | | -pprint(tasks.head()) |
| 42 | +print(tasks.head()) |
44 | 43 |
|
45 | 44 | # The same can be obtained through lesser lines of code |
46 | 45 | tasks_df = openml.tasks.list_tasks(task_type_id=1, output_format='dataframe') |
47 | | -pprint(tasks_df.head()) |
| 46 | +print(tasks_df.head()) |
48 | 47 |
|
49 | 48 | ############################################################################ |
50 | 49 | # We can filter the list of tasks to only contain datasets with more than |
|
78 | 77 | tasks = openml.tasks.list_tasks(tag='OpenML100') |
79 | 78 | tasks = pd.DataFrame.from_dict(tasks, orient='index') |
80 | 79 | print("First 5 of %s tasks:" % len(tasks)) |
81 | | -pprint(tasks.head()) |
| 80 | +print(tasks.head()) |
82 | 81 |
|
83 | 82 | ############################################################################ |
84 | 83 | # Furthermore, we can list tasks based on the dataset id: |
85 | 84 |
|
86 | 85 | tasks = openml.tasks.list_tasks(data_id=1471) |
87 | 86 | tasks = pd.DataFrame.from_dict(tasks, orient='index') |
88 | 87 | print("First 5 of %s tasks:" % len(tasks)) |
89 | | -pprint(tasks.head()) |
| 88 | +print(tasks.head()) |
90 | 89 |
|
91 | 90 | ############################################################################ |
92 | 91 | # In addition, a size limit and an offset can be applied both separately and simultaneously: |
93 | 92 |
|
94 | 93 | tasks = openml.tasks.list_tasks(size=10, offset=50) |
95 | 94 | tasks = pd.DataFrame.from_dict(tasks, orient='index') |
96 | | -pprint(tasks) |
| 95 | +print(tasks) |
97 | 96 |
|
98 | 97 | ############################################################################ |
99 | 98 | # |
|
134 | 133 | ############################################################################ |
135 | 134 | # Properties of the task are stored as member variables: |
136 | 135 |
|
137 | | -pprint(vars(task)) |
| 136 | +print(vars(task)) |
138 | 137 |
|
139 | 138 | ############################################################################ |
140 | 139 | # And: |
141 | 140 |
|
142 | 141 | ids = [2, 1891, 31, 9983] |
143 | 142 | tasks = openml.tasks.get_tasks(ids) |
144 | | -pprint(tasks[0]) |
| 143 | +print(tasks[0]) |
| 144 | + |
| 145 | +############################################################################ |
| 146 | +# Creating tasks |
| 147 | +# ^^^^^^^^^^^^^^ |
| 148 | +# |
| 149 | +# You can also create new tasks. Take the following into account: |
| 150 | +# |
| 151 | +# * You can only create tasks on _active_ datasets |
| 152 | +# * For now, only the following tasks are supported: classification, regression, |
| 153 | +# clustering, and learning curve analysis. |
| 154 | +# * For now, tasks can only be created on a single dataset. |
| 155 | +# * The exact same task must not already exist. |
| 156 | +# |
| 157 | +# Creating a task requires the following input: |
| 158 | +# |
| 159 | +# * task_type_id: The task type ID, required (see below). Required. |
| 160 | +# * dataset_id: The dataset ID. Required. |
| 161 | +# * target_name: The name of the attribute you aim to predict. |
| 162 | +# Optional. |
| 163 | +# * estimation_procedure_id : The ID of the estimation procedure used to create train-test |
| 164 | +# splits. Optional. |
| 165 | +# * evaluation_measure: The name of the evaluation measure. Optional. |
| 166 | +# * Any additional inputs for specific tasks |
| 167 | +# |
| 168 | +# It is best to leave the evaluation measure open if there is no strong prerequisite for a |
| 169 | +# specific measure. OpenML will always compute all appropriate measures and you can filter |
| 170 | +# or sort results on your favourite measure afterwards. Only add an evaluation measure if |
| 171 | +# necessary (e.g. when other measure make no sense), since it will create a new task, which |
| 172 | +# scatters results across tasks. |
| 173 | + |
| 174 | + |
| 175 | +############################################################################ |
| 176 | +# Example |
| 177 | +# ####### |
| 178 | +# |
| 179 | +# Let's create a classification task on a dataset. In this example we will do this on the |
| 180 | +# Iris dataset (ID=128 (on test server)). We'll use 10-fold cross-validation (ID=1), |
| 181 | +# and _predictive accuracy_ as the predefined measure (this can also be left open). |
| 182 | +# If a task with these parameters exist, we will get an appropriate exception. |
| 183 | +# If such a task doesn't exist, a task will be created and the corresponding task_id |
| 184 | +# will be returned. |
| 185 | + |
| 186 | + |
| 187 | +# using test server for example uploads |
| 188 | +openml.config.start_using_configuration_for_example() |
| 189 | + |
| 190 | +try: |
| 191 | + tasktypes = openml.tasks.TaskTypeEnum |
| 192 | + my_task = openml.tasks.create_task( |
| 193 | + task_type_id=tasktypes.SUPERVISED_CLASSIFICATION, |
| 194 | + dataset_id=128, |
| 195 | + target_name="class", |
| 196 | + evaluation_measure="predictive_accuracy", |
| 197 | + estimation_procedure_id=1) |
| 198 | + my_task.publish() |
| 199 | +except openml.exceptions.OpenMLServerException as e: |
| 200 | + # Error code for 'task already exists' |
| 201 | + if e.code == 614: |
| 202 | + # Lookup task |
| 203 | + tasks = openml.tasks.list_tasks(data_id=128, output_format='dataframe').to_numpy() |
| 204 | + tasks = tasks[tasks[:, 4] == "Supervised Classification"] |
| 205 | + tasks = tasks[tasks[:, 6] == "10-fold Crossvalidation"] |
| 206 | + tasks = tasks[tasks[:, 19] == "predictive_accuracy"] |
| 207 | + task_id = tasks[0][0] |
| 208 | + print("Task already exists. Task ID is", task_id) |
| 209 | + |
| 210 | +# reverting to prod server |
| 211 | +openml.config.stop_using_configuration_for_example() |
| 212 | + |
| 213 | + |
| 214 | +############################################################################ |
| 215 | +# [Complete list of task types](https://www.openml.org/search?type=task_type) |
| 216 | +# [Complete list of model estimation procedures]( |
| 217 | +# https://www.openml.org/search?q=%2520measure_type%3Aestimation_procedure&type=measure) |
| 218 | +# [Complete list of evaluation measures]( |
| 219 | +# https://www.openml.org/search?q=measure_type%3Aevaluation_measure&type=measure) |
0 commit comments