66
77import openml
88import pandas as pd
9+ from openml import OpenMLTask , OpenMLDataset
910
1011from amlb .utils import Namespace , str_sanitize
1112
@@ -20,7 +21,13 @@ def is_openml_benchmark(benchmark: str) -> bool:
2021
2122def load_oml_benchmark (benchmark : str ) -> tuple [str , str | None , list [Namespace ]]:
2223 """Loads benchmark defined by openml suite or task, from openml/s/X or openml/t/Y."""
23- domain , oml_type , oml_id = benchmark .split ("/" )
24+ domain , oml_type , oml_id_str = benchmark .split ("/" )
25+ try :
26+ oml_id = int (oml_id_str )
27+ except ValueError :
28+ raise ValueError (
29+ f"Could not convert OpenML id { oml_id_str !r} in { benchmark !r} to integer."
30+ )
2431
2532 if domain == "test.openml" :
2633 log .debug ("Setting openml server to the test server." )
@@ -34,7 +41,7 @@ def load_oml_benchmark(benchmark: str) -> tuple[str, str | None, list[Namespace]
3441 openml .config .set_retry_policy ("robot" )
3542
3643 if oml_type == "t" :
37- tasks = load_openml_task (domain , oml_id )
44+ tasks = load_openml_task_as_definition (domain , oml_id )
3845 elif oml_type == "s" :
3946 tasks = load_openml_tasks_from_suite (domain , oml_id )
4047 else :
@@ -44,7 +51,7 @@ def load_oml_benchmark(benchmark: str) -> tuple[str, str | None, list[Namespace]
4451 return benchmark , None , tasks
4552
4653
47- def load_openml_tasks_from_suite (domain : str , oml_id : str ) -> list [Namespace ]:
54+ def load_openml_tasks_from_suite (domain : str , oml_id : int ) -> list [Namespace ]:
4855 log .info ("Loading openml suite %s." , oml_id )
4956 suite = openml .study .get_suite (oml_id )
5057 # Here we know the (task, dataset) pairs so only download dataset meta-data is sufficient
@@ -66,18 +73,22 @@ def load_openml_tasks_from_suite(domain: str, oml_id: str) -> list[Namespace]:
6673 return tasks
6774
6875
69- def load_openml_task (domain : str , oml_id : str ) -> list [Namespace ]:
76+ def load_openml_task_as_definition (domain : str , oml_id : int ) -> list [Namespace ]:
7077 log .info ("Loading openml task %s." , oml_id )
71- # We first have the retrieve the task because we don't know the dataset id
72- t = openml .tasks .get_task (oml_id , download_data = False , download_qualities = False )
73- data = openml .datasets .get_dataset (
74- t .dataset_id , download_data = False , download_qualities = False
75- )
78+ task , data = load_openml_task_and_data (oml_id )
7679 return [
7780 Namespace (
7881 name = str_sanitize (data .name ),
7982 description = data .description ,
80- openml_task_id = t .id ,
81- id = "{}.org/t/{}" .format (domain , t .id ),
83+ openml_task_id = task .id ,
84+ id = "{}.org/t/{}" .format (domain , task .id ),
8285 )
8386 ]
87+
88+
89+ def load_openml_task_and_data (task_id : int ) -> tuple [OpenMLTask , OpenMLDataset ]:
90+ task = openml .tasks .get_task (task_id , download_data = False , download_qualities = False )
91+ data = openml .datasets .get_dataset (
92+ task .dataset_id , download_data = False , download_qualities = False
93+ )
94+ return task , data
0 commit comments