@@ -106,7 +106,13 @@ def run_flow_on_task(task, flow, avoid_duplicate_runs=True, flow_tags=None,
106106 dataset_id = dataset .dataset_id , model = flow .model , tags = tags )
107107 run .parameter_settings = OpenMLRun ._parse_parameters (flow )
108108
109- run .data_content , run .trace_content , run .trace_attributes , run .detailed_evaluations = res
109+ run .data_content , run .trace_content , run .trace_attributes , fold_evaluations , sample_evaluations = res
110+ # now we need to attach the detailed evaluations
111+ if task .task_type_id == 3 :
112+ run .sample_evaluations = sample_evaluations
113+ else :
114+ run .fold_evaluations = fold_evaluations
115+
110116
111117 config .logger .info ('Executed Task %d with Flow id: %d' % (task .task_id , run .flow_id ))
112118
@@ -299,15 +305,20 @@ def _seed_current_object(current_value):
299305 return model
300306
301307
302- def _prediction_to_row (rep_no , fold_no , row_id , correct_label , predicted_label ,
303- predicted_probabilities , class_labels , model_classes_mapping ):
308+ def _prediction_to_row (rep_no , fold_no , sample_no , row_id , correct_label ,
309+ predicted_label , predicted_probabilities , class_labels ,
310+ model_classes_mapping ):
304311 """Util function that turns probability estimates of a classifier for a given
305312 instance into the right arff format to upload to openml.
306313
307314 Parameters
308315 ----------
309316 rep_no : int
317+ The repeat of the experiment (0-based; in case of 1 time CV, always 0)
310318 fold_no : int
319+ The fold nr of the experiment (0-based; in case of holdout, always 0)
320+ sample_no : int
321+ In case of learning curves, the index of the subsample (0-based; in case of no learning curve, always 0)
311322 row_id : int
312323 row id in the initial dataset
313324 correct_label : str
@@ -328,11 +339,12 @@ def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,
328339 """
329340 if not isinstance (rep_no , (int , np .integer )): raise ValueError ('rep_no should be int' )
330341 if not isinstance (fold_no , (int , np .integer )): raise ValueError ('fold_no should be int' )
342+ if not isinstance (sample_no , (int , np .integer )): raise ValueError ('sample_no should be int' )
331343 if not isinstance (row_id , (int , np .integer )): raise ValueError ('row_id should be int' )
332344 if not len (predicted_probabilities ) == len (model_classes_mapping ):
333345 raise ValueError ('len(predicted_probabilities) != len(class_labels)' )
334346
335- arff_line = [rep_no , fold_no , row_id ]
347+ arff_line = [rep_no , fold_no , sample_no , row_id ]
336348 for class_label_idx in range (len (class_labels )):
337349 if class_label_idx in model_classes_mapping :
338350 index = np .where (model_classes_mapping == class_label_idx )[0 ][0 ] # TODO: WHY IS THIS 2D???
@@ -349,82 +361,100 @@ def _run_task_get_arffcontent(model, task, class_labels):
349361 X , Y = task .get_X_and_y ()
350362 arff_datacontent = []
351363 arff_tracecontent = []
352- user_defined_measures = defaultdict (lambda : defaultdict (dict ))
364+ # stores fold-based evaluation measures. In case of a sample based task,
365+ # this information is multiple times overwritten, but due to the ordering
366+ # of tne loops, eventually it contains the information based on the full
367+ # dataset size
368+ user_defined_measures_fold = defaultdict (lambda : defaultdict (dict ))
369+ # stores sample-based evaluation measures (sublevel of fold-based)
370+ # will also be filled on a non sample-based task, but the information
371+ # is the same as the fold-based measures, and disregarded in that case
372+ user_defined_measures_sample = defaultdict (lambda : defaultdict (lambda : defaultdict (dict )))
353373
354- rep_no = 0
355374 # sys.version_info returns a tuple, the following line compares the entry of tuples
356375 # https://docs.python.org/3.6/reference/expressions.html#value-comparisons
357376 can_measure_runtime = sys .version_info [:2 ] >= (3 , 3 ) and _check_n_jobs (model )
358377 # TODO use different iterator to only provide a single iterator (less
359378 # methods, less maintenance, less confusion)
360- for rep in task .iterate_repeats ():
361- fold_no = 0
362- for fold in rep :
363- model_fold = sklearn .base .clone (model , safe = True )
364- train_indices , test_indices = fold
365- trainX = X [train_indices ]
366- trainY = Y [train_indices ]
367- testX = X [test_indices ]
368- testY = Y [test_indices ]
369-
370- try :
371- # for measuring runtime. Only available since Python 3.3
372- if can_measure_runtime :
373- modelfit_starttime = time .process_time ()
374- model_fold .fit (trainX , trainY )
375-
376- if can_measure_runtime :
377- modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
378- user_defined_measures ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
379- except AttributeError as e :
380- # typically happens when training a regressor on classification task
381- raise PyOpenMLError (str (e ))
382-
383- # extract trace, if applicable
384- if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
385- arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
386-
387- # search for model classes_ (might differ depending on modeltype)
388- # first, pipelines are a special case (these don't have a classes_
389- # object, but rather borrows it from the last step. We do this manually,
390- # because of the BaseSearch check)
391- if isinstance (model_fold , sklearn .pipeline .Pipeline ):
392- used_estimator = model_fold .steps [- 1 ][- 1 ]
393- else :
394- used_estimator = model_fold
379+ num_reps , num_folds , num_samples = task .get_split_dimensions ()
380+
381+ for rep_no in range (num_reps ):
382+ for fold_no in range (num_folds ):
383+ for sample_no in range (num_samples ):
384+ model_fold = sklearn .base .clone (model , safe = True )
385+ train_indices , test_indices = task .get_train_test_split_indices (repeat = rep_no ,
386+ fold = fold_no ,
387+ sample = sample_no )
388+ trainX = X [train_indices ]
389+ trainY = Y [train_indices ]
390+ testX = X [test_indices ]
391+ testY = Y [test_indices ]
392+
393+ try :
394+ # for measuring runtime. Only available since Python 3.3
395+ if can_measure_runtime :
396+ modelfit_starttime = time .process_time ()
397+ model_fold .fit (trainX , trainY )
398+
399+ if can_measure_runtime :
400+ modelfit_duration = (time .process_time () - modelfit_starttime ) * 1000
401+ user_defined_measures_sample ['usercpu_time_millis_training' ][rep_no ][fold_no ][sample_no ] = modelfit_duration
402+ user_defined_measures_fold ['usercpu_time_millis_training' ][rep_no ][fold_no ] = modelfit_duration
403+ except AttributeError as e :
404+ # typically happens when training a regressor on classification task
405+ raise PyOpenMLError (str (e ))
406+
407+ # extract trace, if applicable
408+ if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
409+ arff_tracecontent .extend (_extract_arfftrace (model_fold , rep_no , fold_no ))
410+
411+ # search for model classes_ (might differ depending on modeltype)
412+ # first, pipelines are a special case (these don't have a classes_
413+ # object, but rather borrows it from the last step. We do this manually,
414+ # because of the BaseSearch check)
415+ if isinstance (model_fold , sklearn .pipeline .Pipeline ):
416+ used_estimator = model_fold .steps [- 1 ][- 1 ]
417+ else :
418+ used_estimator = model_fold
395419
396- if isinstance (used_estimator , sklearn .model_selection ._search .BaseSearchCV ):
397- model_classes = used_estimator .best_estimator_ .classes_
398- else :
399- model_classes = used_estimator .classes_
420+ if isinstance (used_estimator , sklearn .model_selection ._search .BaseSearchCV ):
421+ model_classes = used_estimator .best_estimator_ .classes_
422+ else :
423+ model_classes = used_estimator .classes_
400424
401- if can_measure_runtime :
402- modelpredict_starttime = time .process_time ()
403-
404- ProbaY = model_fold .predict_proba (testX )
405- PredY = model_fold .predict (testX )
406- if can_measure_runtime :
407- modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
408- user_defined_measures ['usercpu_time_millis_testing' ][rep_no ][fold_no ] = modelpredict_duration
409- user_defined_measures ['usercpu_time_millis' ][rep_no ][fold_no ] = modelfit_duration + modelpredict_duration
425+ if can_measure_runtime :
426+ modelpredict_starttime = time .process_time ()
410427
411- if ProbaY .shape [1 ] != len (class_labels ):
412- warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
428+ ProbaY = model_fold .predict_proba (testX )
429+ PredY = model_fold .predict (testX )
430+ if can_measure_runtime :
431+ modelpredict_duration = (time .process_time () - modelpredict_starttime ) * 1000
432+ user_defined_measures_fold ['usercpu_time_millis_testing' ][rep_no ][fold_no ] = modelpredict_duration
433+ user_defined_measures_fold ['usercpu_time_millis' ][rep_no ][fold_no ] = modelfit_duration + modelpredict_duration
434+ user_defined_measures_sample ['usercpu_time_millis_testing' ][rep_no ][fold_no ][sample_no ] = modelpredict_duration
435+ user_defined_measures_sample ['usercpu_time_millis' ][rep_no ][fold_no ][sample_no ] = modelfit_duration + modelpredict_duration
413436
414- for i in range (0 , len (test_indices )):
415- arff_line = _prediction_to_row (rep_no , fold_no , test_indices [i ], class_labels [testY [i ]], PredY [i ], ProbaY [i ], class_labels , model_classes )
416- arff_datacontent .append (arff_line )
437+ if ProbaY .shape [1 ] != len (class_labels ):
438+ warnings .warn ("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" % (rep_no , fold_no , ProbaY .shape [1 ], len (class_labels )))
417439
418- fold_no = fold_no + 1
419- rep_no = rep_no + 1
440+ for i in range (0 , len (test_indices )):
441+ arff_line = _prediction_to_row (rep_no , fold_no , sample_no ,
442+ test_indices [i ], class_labels [testY [i ]],
443+ PredY [i ], ProbaY [i ], class_labels , model_classes )
444+ arff_datacontent .append (arff_line )
420445
421446 if isinstance (model_fold , sklearn .model_selection ._search .BaseSearchCV ):
422447 # arff_tracecontent is already set
423448 arff_trace_attributes = _extract_arfftrace_attributes (model_fold )
424449 else :
425450 arff_tracecontent = None
426451 arff_trace_attributes = None
427- return arff_datacontent , arff_tracecontent , arff_trace_attributes , user_defined_measures
452+
453+ return arff_datacontent , \
454+ arff_tracecontent , \
455+ arff_trace_attributes , \
456+ user_defined_measures_fold , \
457+ user_defined_measures_sample
428458
429459
430460def _extract_arfftrace (model , rep_no , fold_no ):
@@ -571,7 +601,8 @@ def _create_run_from_xml(xml):
571601
572602 files = dict ()
573603 evaluations = dict ()
574- detailed_evaluations = defaultdict (lambda : defaultdict (dict ))
604+ fold_evaluations = defaultdict (lambda : defaultdict (dict ))
605+ sample_evaluations = defaultdict (lambda : defaultdict (lambda : defaultdict (dict )))
575606 if 'oml:output_data' not in run :
576607 raise ValueError ('Run does not contain output_data (OpenML server error?)' )
577608 else :
@@ -598,11 +629,18 @@ def _create_run_from_xml(xml):
598629 else :
599630 raise ValueError ('Could not find keys "value" or "array_data" '
600631 'in %s' % str (evaluation_dict .keys ()))
601-
602- if '@repeat' in evaluation_dict and '@fold' in evaluation_dict :
632+ if '@repeat' in evaluation_dict and '@fold' in evaluation_dict and '@sample' in evaluation_dict :
633+ repeat = int (evaluation_dict ['@repeat' ])
634+ fold = int (evaluation_dict ['@fold' ])
635+ sample = int (evaluation_dict ['@sample' ])
636+ repeat_dict = sample_evaluations [key ]
637+ fold_dict = repeat_dict [repeat ]
638+ sample_dict = fold_dict [fold ]
639+ sample_dict [sample ] = value
640+ elif '@repeat' in evaluation_dict and '@fold' in evaluation_dict :
603641 repeat = int (evaluation_dict ['@repeat' ])
604642 fold = int (evaluation_dict ['@fold' ])
605- repeat_dict = detailed_evaluations [key ]
643+ repeat_dict = fold_evaluations [key ]
606644 fold_dict = repeat_dict [repeat ]
607645 fold_dict [fold ] = value
608646 else :
@@ -629,7 +667,9 @@ def _create_run_from_xml(xml):
629667 parameter_settings = parameters ,
630668 dataset_id = dataset_id , output_files = files ,
631669 evaluations = evaluations ,
632- detailed_evaluations = detailed_evaluations , tags = tags )
670+ fold_evaluations = fold_evaluations ,
671+ sample_evaluations = sample_evaluations ,
672+ tags = tags )
633673
634674def _create_trace_from_description (xml ):
635675 result_dict = xmltodict .parse (xml )['oml:trace' ]
0 commit comments