Skip to content

Commit c89c239

Browse files
authored
Merge branch 'develop' into bump_version
2 parents b38cad5 + bef46b7 commit c89c239

File tree

18 files changed

+1962
-1732
lines changed

18 files changed

+1962
-1732
lines changed

examples/OpenMLDemo.ipynb

Lines changed: 0 additions & 703 deletions
This file was deleted.

examples/OpenML_Tutorial.ipynb

Lines changed: 1344 additions & 0 deletions
Large diffs are not rendered by default.

examples/PyOpenML.ipynb

Lines changed: 0 additions & 862 deletions
This file was deleted.

openml/evaluations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .evaluation import OpenMLEvaluation
2+
from .functions import list_evaluations

openml/evaluations/evaluation.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
2+
class OpenMLEvaluation(object):
3+
'''
4+
Contains all meta-information about a run / evaluation combination,
5+
according to the evaluation/list function
6+
7+
Parameters
8+
----------
9+
run_id : int
10+
task_id : int
11+
setup_id : int
12+
flow_id : int
13+
flow_name : str
14+
data_id : int
15+
data_name : str
16+
the name of the dataset
17+
function : str
18+
the evaluation function of this item (e.g., accuracy)
19+
upload_time : str
20+
the time of evaluation
21+
value : float
22+
the value of this evaluation
23+
array_data : str
24+
list of information per class (e.g., in case of precision, auroc, recall)
25+
'''
26+
def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
27+
data_id, data_name, function, upload_time, value,
28+
array_data=None):
29+
self.run_id = run_id
30+
self.task_id = task_id
31+
self.setup_id = setup_id
32+
self.flow_id = flow_id
33+
self.flow_name = flow_name
34+
self.data_id = data_id
35+
self.data_name = data_name
36+
self.function = function
37+
self.upload_time = upload_time
38+
self.value = value
39+
self.array_data = array_data
40+

openml/evaluations/functions.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import xmltodict
2+
3+
from .._api_calls import _perform_api_call
4+
from ..evaluations import OpenMLEvaluation
5+
6+
def list_evaluations(function, offset=None, size=None, id=None, task=None, setup=None,
7+
flow=None, uploader=None, tag=None):
8+
"""List all run-evaluation pairs matching all of the given filters.
9+
10+
Perform API call `/evaluation/function{function}/{filters}
11+
12+
Parameters
13+
----------
14+
function : str
15+
the evaluation function. e.g., predictive_accuracy
16+
offset : int, optional
17+
the number of runs to skip, starting from the first
18+
size : int, optional
19+
the maximum number of runs to show
20+
21+
id : list, optional
22+
23+
task : list, optional
24+
25+
setup: list, optional
26+
27+
flow : list, optional
28+
29+
uploader : list, optional
30+
31+
tag : str, optional
32+
33+
Returns
34+
-------
35+
list
36+
List of found evaluations.
37+
"""
38+
39+
api_call = "evaluation/list/function/%s" %function
40+
if offset is not None:
41+
api_call += "/offset/%d" % int(offset)
42+
if size is not None:
43+
api_call += "/limit/%d" % int(size)
44+
if id is not None:
45+
api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
46+
if task is not None:
47+
api_call += "/task/%s" % ','.join([str(int(i)) for i in task])
48+
if setup is not None:
49+
api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup])
50+
if flow is not None:
51+
api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
52+
if uploader is not None:
53+
api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
54+
if tag is not None:
55+
api_call += "/tag/%s" % tag
56+
57+
return _list_evaluations(api_call)
58+
59+
60+
def _list_evaluations(api_call):
61+
"""Helper function to parse API calls which are lists of runs"""
62+
63+
xml_string = _perform_api_call(api_call)
64+
65+
evals_dict = xmltodict.parse(xml_string)
66+
# Minimalistic check if the XML is useful
67+
if 'oml:evaluations' not in evals_dict:
68+
raise ValueError('Error in return XML, does not contain "oml:evaluations": %s'
69+
% str(evals_dict))
70+
71+
if isinstance(evals_dict['oml:evaluations']['oml:evaluation'], list):
72+
evals_list = evals_dict['oml:evaluations']['oml:evaluation']
73+
elif isinstance(evals_dict['oml:evaluations']['oml:evaluation'], dict):
74+
evals_list = [evals_dict['oml:evaluations']['oml:evaluation']]
75+
else:
76+
raise TypeError()
77+
78+
evals = dict()
79+
for eval_ in evals_list:
80+
run_id = int(eval_['oml:run_id'])
81+
array_data = None
82+
if 'oml:array_data' in eval_:
83+
eval_['oml:array_data']
84+
85+
evaluation = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']),
86+
int(eval_['oml:setup_id']), int(eval_['oml:flow_id']),
87+
eval_['oml:flow_name'], eval_['oml:data_id'],
88+
eval_['oml:data_name'], eval_['oml:function'],
89+
eval_['oml:upload_time'], float(eval_['oml:value']),
90+
array_data)
91+
evals[run_id] = evaluation
92+
return evals
93+

openml/runs/functions.py

Lines changed: 107 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,13 @@ def run_flow_on_task(task, flow, avoid_duplicate_runs=True, flow_tags=None,
106106
dataset_id=dataset.dataset_id, model=flow.model, tags=tags)
107107
run.parameter_settings = OpenMLRun._parse_parameters(flow)
108108

109-
run.data_content, run.trace_content, run.trace_attributes, run.detailed_evaluations = res
109+
run.data_content, run.trace_content, run.trace_attributes, fold_evaluations, sample_evaluations = res
110+
# now we need to attach the detailed evaluations
111+
if task.task_type_id == 3:
112+
run.sample_evaluations = sample_evaluations
113+
else:
114+
run.fold_evaluations = fold_evaluations
115+
110116

111117
config.logger.info('Executed Task %d with Flow id: %d' % (task.task_id, run.flow_id))
112118

@@ -299,15 +305,20 @@ def _seed_current_object(current_value):
299305
return model
300306

301307

302-
def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
303-
predicted_probabilities, class_labels, model_classes_mapping):
308+
def _prediction_to_row(rep_no, fold_no, sample_no, row_id, correct_label,
309+
predicted_label, predicted_probabilities, class_labels,
310+
model_classes_mapping):
304311
"""Util function that turns probability estimates of a classifier for a given
305312
instance into the right arff format to upload to openml.
306313
307314
Parameters
308315
----------
309316
rep_no : int
317+
The repeat of the experiment (0-based; in case of 1 time CV, always 0)
310318
fold_no : int
319+
The fold nr of the experiment (0-based; in case of holdout, always 0)
320+
sample_no : int
321+
In case of learning curves, the index of the subsample (0-based; in case of no learning curve, always 0)
311322
row_id : int
312323
row id in the initial dataset
313324
correct_label : str
@@ -328,11 +339,12 @@ def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
328339
"""
329340
if not isinstance(rep_no, (int, np.integer)): raise ValueError('rep_no should be int')
330341
if not isinstance(fold_no, (int, np.integer)): raise ValueError('fold_no should be int')
342+
if not isinstance(sample_no, (int, np.integer)): raise ValueError('sample_no should be int')
331343
if not isinstance(row_id, (int, np.integer)): raise ValueError('row_id should be int')
332344
if not len(predicted_probabilities) == len(model_classes_mapping):
333345
raise ValueError('len(predicted_probabilities) != len(class_labels)')
334346

335-
arff_line = [rep_no, fold_no, row_id]
347+
arff_line = [rep_no, fold_no, sample_no, row_id]
336348
for class_label_idx in range(len(class_labels)):
337349
if class_label_idx in model_classes_mapping:
338350
index = np.where(model_classes_mapping == class_label_idx)[0][0] # TODO: WHY IS THIS 2D???
@@ -349,82 +361,100 @@ def _run_task_get_arffcontent(model, task, class_labels):
349361
X, Y = task.get_X_and_y()
350362
arff_datacontent = []
351363
arff_tracecontent = []
352-
user_defined_measures = defaultdict(lambda: defaultdict(dict))
364+
# stores fold-based evaluation measures. In case of a sample based task,
365+
# this information is multiple times overwritten, but due to the ordering
366+
# of tne loops, eventually it contains the information based on the full
367+
# dataset size
368+
user_defined_measures_fold = defaultdict(lambda: defaultdict(dict))
369+
# stores sample-based evaluation measures (sublevel of fold-based)
370+
# will also be filled on a non sample-based task, but the information
371+
# is the same as the fold-based measures, and disregarded in that case
372+
user_defined_measures_sample = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
353373

354-
rep_no = 0
355374
# sys.version_info returns a tuple, the following line compares the entry of tuples
356375
# https://docs.python.org/3.6/reference/expressions.html#value-comparisons
357376
can_measure_runtime = sys.version_info[:2] >= (3, 3) and _check_n_jobs(model)
358377
# TODO use different iterator to only provide a single iterator (less
359378
# methods, less maintenance, less confusion)
360-
for rep in task.iterate_repeats():
361-
fold_no = 0
362-
for fold in rep:
363-
model_fold = sklearn.base.clone(model, safe=True)
364-
train_indices, test_indices = fold
365-
trainX = X[train_indices]
366-
trainY = Y[train_indices]
367-
testX = X[test_indices]
368-
testY = Y[test_indices]
369-
370-
try:
371-
# for measuring runtime. Only available since Python 3.3
372-
if can_measure_runtime:
373-
modelfit_starttime = time.process_time()
374-
model_fold.fit(trainX, trainY)
375-
376-
if can_measure_runtime:
377-
modelfit_duration = (time.process_time() - modelfit_starttime) * 1000
378-
user_defined_measures['usercpu_time_millis_training'][rep_no][fold_no] = modelfit_duration
379-
except AttributeError as e:
380-
# typically happens when training a regressor on classification task
381-
raise PyOpenMLError(str(e))
382-
383-
# extract trace, if applicable
384-
if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
385-
arff_tracecontent.extend(_extract_arfftrace(model_fold, rep_no, fold_no))
386-
387-
# search for model classes_ (might differ depending on modeltype)
388-
# first, pipelines are a special case (these don't have a classes_
389-
# object, but rather borrows it from the last step. We do this manually,
390-
# because of the BaseSearch check)
391-
if isinstance(model_fold, sklearn.pipeline.Pipeline):
392-
used_estimator = model_fold.steps[-1][-1]
393-
else:
394-
used_estimator = model_fold
379+
num_reps, num_folds, num_samples = task.get_split_dimensions()
380+
381+
for rep_no in range(num_reps):
382+
for fold_no in range(num_folds):
383+
for sample_no in range(num_samples):
384+
model_fold = sklearn.base.clone(model, safe=True)
385+
train_indices, test_indices = task.get_train_test_split_indices(repeat=rep_no,
386+
fold=fold_no,
387+
sample=sample_no)
388+
trainX = X[train_indices]
389+
trainY = Y[train_indices]
390+
testX = X[test_indices]
391+
testY = Y[test_indices]
392+
393+
try:
394+
# for measuring runtime. Only available since Python 3.3
395+
if can_measure_runtime:
396+
modelfit_starttime = time.process_time()
397+
model_fold.fit(trainX, trainY)
398+
399+
if can_measure_runtime:
400+
modelfit_duration = (time.process_time() - modelfit_starttime) * 1000
401+
user_defined_measures_sample['usercpu_time_millis_training'][rep_no][fold_no][sample_no] = modelfit_duration
402+
user_defined_measures_fold['usercpu_time_millis_training'][rep_no][fold_no] = modelfit_duration
403+
except AttributeError as e:
404+
# typically happens when training a regressor on classification task
405+
raise PyOpenMLError(str(e))
406+
407+
# extract trace, if applicable
408+
if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
409+
arff_tracecontent.extend(_extract_arfftrace(model_fold, rep_no, fold_no))
410+
411+
# search for model classes_ (might differ depending on modeltype)
412+
# first, pipelines are a special case (these don't have a classes_
413+
# object, but rather borrows it from the last step. We do this manually,
414+
# because of the BaseSearch check)
415+
if isinstance(model_fold, sklearn.pipeline.Pipeline):
416+
used_estimator = model_fold.steps[-1][-1]
417+
else:
418+
used_estimator = model_fold
395419

396-
if isinstance(used_estimator, sklearn.model_selection._search.BaseSearchCV):
397-
model_classes = used_estimator.best_estimator_.classes_
398-
else:
399-
model_classes = used_estimator.classes_
420+
if isinstance(used_estimator, sklearn.model_selection._search.BaseSearchCV):
421+
model_classes = used_estimator.best_estimator_.classes_
422+
else:
423+
model_classes = used_estimator.classes_
400424

401-
if can_measure_runtime:
402-
modelpredict_starttime = time.process_time()
403-
404-
ProbaY = model_fold.predict_proba(testX)
405-
PredY = model_fold.predict(testX)
406-
if can_measure_runtime:
407-
modelpredict_duration = (time.process_time() - modelpredict_starttime) * 1000
408-
user_defined_measures['usercpu_time_millis_testing'][rep_no][fold_no] = modelpredict_duration
409-
user_defined_measures['usercpu_time_millis'][rep_no][fold_no] = modelfit_duration + modelpredict_duration
425+
if can_measure_runtime:
426+
modelpredict_starttime = time.process_time()
410427

411-
if ProbaY.shape[1] != len(class_labels):
412-
warnings.warn("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" %(rep_no, fold_no, ProbaY.shape[1], len(class_labels)))
428+
ProbaY = model_fold.predict_proba(testX)
429+
PredY = model_fold.predict(testX)
430+
if can_measure_runtime:
431+
modelpredict_duration = (time.process_time() - modelpredict_starttime) * 1000
432+
user_defined_measures_fold['usercpu_time_millis_testing'][rep_no][fold_no] = modelpredict_duration
433+
user_defined_measures_fold['usercpu_time_millis'][rep_no][fold_no] = modelfit_duration + modelpredict_duration
434+
user_defined_measures_sample['usercpu_time_millis_testing'][rep_no][fold_no][sample_no] = modelpredict_duration
435+
user_defined_measures_sample['usercpu_time_millis'][rep_no][fold_no][sample_no] = modelfit_duration + modelpredict_duration
413436

414-
for i in range(0, len(test_indices)):
415-
arff_line = _prediction_to_row(rep_no, fold_no, test_indices[i], class_labels[testY[i]], PredY[i], ProbaY[i], class_labels, model_classes)
416-
arff_datacontent.append(arff_line)
437+
if ProbaY.shape[1] != len(class_labels):
438+
warnings.warn("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" %(rep_no, fold_no, ProbaY.shape[1], len(class_labels)))
417439

418-
fold_no = fold_no + 1
419-
rep_no = rep_no + 1
440+
for i in range(0, len(test_indices)):
441+
arff_line = _prediction_to_row(rep_no, fold_no, sample_no,
442+
test_indices[i], class_labels[testY[i]],
443+
PredY[i], ProbaY[i], class_labels, model_classes)
444+
arff_datacontent.append(arff_line)
420445

421446
if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
422447
# arff_tracecontent is already set
423448
arff_trace_attributes = _extract_arfftrace_attributes(model_fold)
424449
else:
425450
arff_tracecontent = None
426451
arff_trace_attributes = None
427-
return arff_datacontent, arff_tracecontent, arff_trace_attributes, user_defined_measures
452+
453+
return arff_datacontent, \
454+
arff_tracecontent, \
455+
arff_trace_attributes, \
456+
user_defined_measures_fold, \
457+
user_defined_measures_sample
428458

429459

430460
def _extract_arfftrace(model, rep_no, fold_no):
@@ -571,7 +601,8 @@ def _create_run_from_xml(xml):
571601

572602
files = dict()
573603
evaluations = dict()
574-
detailed_evaluations = defaultdict(lambda: defaultdict(dict))
604+
fold_evaluations = defaultdict(lambda: defaultdict(dict))
605+
sample_evaluations = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
575606
if 'oml:output_data' not in run:
576607
raise ValueError('Run does not contain output_data (OpenML server error?)')
577608
else:
@@ -598,11 +629,18 @@ def _create_run_from_xml(xml):
598629
else:
599630
raise ValueError('Could not find keys "value" or "array_data" '
600631
'in %s' % str(evaluation_dict.keys()))
601-
602-
if '@repeat' in evaluation_dict and '@fold' in evaluation_dict:
632+
if '@repeat' in evaluation_dict and '@fold' in evaluation_dict and '@sample' in evaluation_dict:
633+
repeat = int(evaluation_dict['@repeat'])
634+
fold = int(evaluation_dict['@fold'])
635+
sample = int(evaluation_dict['@sample'])
636+
repeat_dict = sample_evaluations[key]
637+
fold_dict = repeat_dict[repeat]
638+
sample_dict = fold_dict[fold]
639+
sample_dict[sample] = value
640+
elif '@repeat' in evaluation_dict and '@fold' in evaluation_dict:
603641
repeat = int(evaluation_dict['@repeat'])
604642
fold = int(evaluation_dict['@fold'])
605-
repeat_dict = detailed_evaluations[key]
643+
repeat_dict = fold_evaluations[key]
606644
fold_dict = repeat_dict[repeat]
607645
fold_dict[fold] = value
608646
else:
@@ -629,7 +667,9 @@ def _create_run_from_xml(xml):
629667
parameter_settings=parameters,
630668
dataset_id=dataset_id, output_files=files,
631669
evaluations=evaluations,
632-
detailed_evaluations=detailed_evaluations, tags=tags)
670+
fold_evaluations=fold_evaluations,
671+
sample_evaluations=sample_evaluations,
672+
tags=tags)
633673

634674
def _create_trace_from_description(xml):
635675
result_dict = xmltodict.parse(xml)['oml:trace']

0 commit comments

Comments
 (0)