@@ -75,7 +75,8 @@ def _get_cached_dataset(dataset_id):
7575 description = _get_cached_dataset_description (dataset_id )
7676 arff_file = _get_cached_dataset_arff (dataset_id )
7777 features = _get_cached_dataset_features (dataset_id )
78- dataset = _create_dataset_from_description (description , features , arff_file )
78+ qualities = _get_cached_dataset_qualities (dataset_id )
79+ dataset = _create_dataset_from_description (description , features , qualities , arff_file )
7980
8081 return dataset
8182
@@ -107,6 +108,19 @@ def _get_cached_dataset_features(dataset_id):
107108 "cached" % dataset_id )
108109
109110
111+ def _get_cached_dataset_qualities (dataset_id ):
112+ cache_dir = config .get_cache_directory ()
113+ did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
114+ qualities_file = os .path .join (did_cache_dir , "qualities.xml" )
115+ try :
116+ with io .open (qualities_file , encoding = 'utf8' ) as fh :
117+ qualities_xml = fh .read ()
118+ return xmltodict .parse (qualities_xml )["oml:data_qualities" ]
119+ except (IOError , OSError ):
120+ raise OpenMLCacheException ("Dataset qualities for dataset id %d not "
121+ "cached" % dataset_id )
122+
123+
110124def _get_cached_dataset_arff (dataset_id ):
111125 cache_dir = config .get_cache_directory ()
112126 did_cache_dir = os .path .join (cache_dir , "datasets" , str (dataset_id ))
@@ -272,7 +286,7 @@ def get_dataset(dataset_id):
272286 _remove_dataset_cache_dir (did_cache_dir )
273287 raise e
274288
275- dataset = _create_dataset_from_description (description , features , arff_file )
289+ dataset = _create_dataset_from_description (description , features , qualities , arff_file )
276290 return dataset
277291
278292
@@ -470,7 +484,7 @@ def _remove_dataset_cache_dir(did_cache_dir):
470484 'Please do this manually!' % did_cache_dir )
471485
472486
473- def _create_dataset_from_description (description , features , arff_file ):
487+ def _create_dataset_from_description (description , features , qualities , arff_file ):
474488 """Create a dataset object from a description dict.
475489
476490 Parameters
@@ -510,5 +524,6 @@ def _create_dataset_from_description(description, features, arff_file):
510524 description .get ("oml:update_comment" ),
511525 description .get ("oml:md5_checksum" ),
512526 data_file = arff_file ,
513- features = features )
527+ features = features ,
528+ qualities = qualities )
514529 return dataset
0 commit comments