|
1 | 1 | import json |
2 | 2 | import xmltodict |
3 | 3 | import pandas as pd |
| 4 | +import numpy as np |
4 | 5 | from typing import Union, List, Optional, Dict |
5 | 6 | import collections |
6 | 7 |
|
7 | 8 | import openml.utils |
8 | 9 | import openml._api_calls |
9 | 10 | from ..evaluations import OpenMLEvaluation |
| 11 | +import openml |
10 | 12 |
|
11 | 13 |
|
12 | 14 | def list_evaluations( |
@@ -209,8 +211,8 @@ def __list_evaluations(api_call, output_format='object'): |
209 | 211 | 'array_data': array_data} |
210 | 212 |
|
211 | 213 | if output_format == 'dataframe': |
212 | | - evals = pd.DataFrame.from_dict(evals, orient='index') |
213 | | - |
| 214 | + rows = [value for key, value in evals.items()] |
| 215 | + evals = pd.DataFrame.from_records(rows, columns=rows[0].keys()) |
214 | 216 | return evals |
215 | 217 |
|
216 | 218 |
|
@@ -238,3 +240,90 @@ def list_evaluation_measures() -> List[str]: |
238 | 240 | '"oml:measure" as a list') |
239 | 241 | qualities = qualities['oml:evaluation_measures']['oml:measures'][0]['oml:measure'] |
240 | 242 | return qualities |
| 243 | + |
| 244 | + |
| 245 | +def list_evaluations_setups( |
| 246 | + function: str, |
| 247 | + offset: Optional[int] = None, |
| 248 | + size: Optional[int] = None, |
| 249 | + id: Optional[List] = None, |
| 250 | + task: Optional[List] = None, |
| 251 | + setup: Optional[List] = None, |
| 252 | + flow: Optional[List] = None, |
| 253 | + uploader: Optional[List] = None, |
| 254 | + tag: Optional[str] = None, |
| 255 | + per_fold: Optional[bool] = None, |
| 256 | + sort_order: Optional[str] = None, |
| 257 | + output_format: str = 'dataframe' |
| 258 | +) -> Union[Dict, pd.DataFrame]: |
| 259 | + """ |
| 260 | + List all run-evaluation pairs matching all of the given filters |
| 261 | + and their hyperparameter settings. |
| 262 | +
|
| 263 | + Parameters |
| 264 | + ---------- |
| 265 | + function : str |
| 266 | + the evaluation function. e.g., predictive_accuracy |
| 267 | + offset : int, optional |
| 268 | + the number of runs to skip, starting from the first |
| 269 | + size : int, optional |
| 270 | + the maximum number of runs to show |
| 271 | + id : list[int], optional |
| 272 | + the list of evaluation ID's |
| 273 | + task : list[int], optional |
| 274 | + the list of task ID's |
| 275 | + setup: list[int], optional |
| 276 | + the list of setup ID's |
| 277 | + flow : list[int], optional |
| 278 | + the list of flow ID's |
| 279 | + uploader : list[int], optional |
| 280 | + the list of uploader ID's |
| 281 | + tag : str, optional |
| 282 | + filter evaluation based on given tag |
| 283 | + per_fold : bool, optional |
| 284 | + sort_order : str, optional |
| 285 | + order of sorting evaluations, ascending ("asc") or descending ("desc") |
| 286 | + output_format: str, optional (default='dataframe') |
| 287 | + The parameter decides the format of the output. |
| 288 | + - If 'dict' the output is a dict of dict |
| 289 | + - If 'dataframe' the output is a pandas DataFrame |
| 290 | +
|
| 291 | +
|
| 292 | + Returns |
| 293 | + ------- |
| 294 | + dict or dataframe with hyperparameter settings as a list of tuples. |
| 295 | + """ |
| 296 | + # List evaluations |
| 297 | + evals = list_evaluations(function=function, offset=offset, size=size, id=id, task=task, |
| 298 | + setup=setup, flow=flow, uploader=uploader, tag=tag, |
| 299 | + per_fold=per_fold, sort_order=sort_order, output_format='dataframe') |
| 300 | + |
| 301 | + # List setups |
| 302 | + # Split setups in evals into chunks of N setups as list_setups does not support large size |
| 303 | + df = pd.DataFrame() |
| 304 | + if len(evals) != 0: |
| 305 | + N = 100 |
| 306 | + setup_chunks = np.split(evals['setup_id'].unique(), |
| 307 | + ((len(evals['setup_id'].unique()) - 1) // N) + 1) |
| 308 | + setups = pd.DataFrame() |
| 309 | + for setup in setup_chunks: |
| 310 | + result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format='dataframe')) |
| 311 | + result.drop('flow_id', axis=1, inplace=True) |
| 312 | + # concat resulting setup chunks into single datframe |
| 313 | + setups = pd.concat([setups, result], ignore_index=True) |
| 314 | + parameters = [] |
| 315 | + # Convert parameters of setup into list of tuples of (hyperparameter, value) |
| 316 | + for parameter_dict in setups['parameters']: |
| 317 | + if parameter_dict is not None: |
| 318 | + parameters.append([tuple([param['parameter_name'], param['value']]) |
| 319 | + for param in parameter_dict.values()]) |
| 320 | + else: |
| 321 | + parameters.append([]) |
| 322 | + setups['parameters'] = parameters |
| 323 | + # Merge setups with evaluations |
| 324 | + df = pd.merge(evals, setups, on='setup_id', how='left') |
| 325 | + |
| 326 | + if output_format == 'dataframe': |
| 327 | + return df |
| 328 | + else: |
| 329 | + return df.to_dict(orient='index') |
0 commit comments