Skip to content

Commit 368700e

Browse files
authored
Improve error handling and error message when loading datasets (#925)
* MAINT 918: improve error handling and error message * incorporate feedback from Pieter
1 parent 8f99ff6 commit 368700e

File tree

1 file changed

+49
-1
lines changed

1 file changed

+49
-1
lines changed

openml/datasets/dataset.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,17 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
456456
# The file is likely corrupt, see #780.
457457
# We deal with this when loading the data in `_load_data`.
458458
return data_pickle_file, data_feather_file, feather_attribute_file
459+
except ModuleNotFoundError:
460+
# There was some issue loading the file, see #918
461+
# We deal with this when loading the data in `_load_data`.
462+
return data_pickle_file, data_feather_file, feather_attribute_file
463+
except ValueError as e:
464+
if "unsupported pickle protocol" in e.args[0]:
465+
# There was some issue loading the file, see #898
466+
# We deal with this when loading the data in `_load_data`.
467+
return data_pickle_file, data_feather_file, feather_attribute_file
468+
else:
469+
raise
459470

460471
# Between v0.8 and v0.9 the format of pickled data changed from
461472
# np.ndarray to pd.DataFrame. This breaks some backwards compatibility,
@@ -473,6 +484,17 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
473484
# The file is likely corrupt, see #780.
474485
# We deal with this when loading the data in `_load_data`.
475486
return data_pickle_file, data_feather_file, feather_attribute_file
487+
except ModuleNotFoundError:
488+
# There was some issue loading the file, see #918
489+
# We deal with this when loading the data in `_load_data`.
490+
return data_pickle_file, data_feather_file, feather_attribute_file
491+
except ValueError as e:
492+
if "unsupported pickle protocol" in e.args[0]:
493+
# There was some issue loading the file, see #898
494+
# We deal with this when loading the data in `_load_data`.
495+
return data_pickle_file, data_feather_file, feather_attribute_file
496+
else:
497+
raise
476498

477499
logger.debug("Data feather file already exists and is up to date.")
478500
return data_pickle_file, data_feather_file, feather_attribute_file
@@ -529,7 +551,7 @@ def _load_data(self):
529551
"Detected a corrupt cache file loading dataset %d: '%s'. "
530552
"We will continue loading data from the arff-file, "
531553
"but this will be much slower for big datasets. "
532-
"Please manually delete the cache file if you want openml-python "
554+
"Please manually delete the cache file if you want OpenML-Python "
533555
"to attempt to reconstruct it."
534556
"" % (self.dataset_id, self.data_pickle_file)
535557
)
@@ -539,6 +561,32 @@ def _load_data(self):
539561
"Cannot find a pickle file for dataset {} at "
540562
"location {} ".format(self.name, self.data_pickle_file)
541563
)
564+
except ModuleNotFoundError as e:
565+
logger.warning(
566+
"Encountered error message when loading cached dataset %d: '%s'. "
567+
"Error message was: %s. "
568+
"This is most likely due to https://github.com/openml/openml-python/issues/918. "
569+
"We will continue loading data from the arff-file, "
570+
"but this will be much slower for big datasets. "
571+
"Please manually delete the cache file if you want OpenML-Python "
572+
"to attempt to reconstruct it."
573+
"" % (self.dataset_id, self.data_pickle_file, e.args[0]),
574+
)
575+
data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
576+
except ValueError as e:
577+
if "unsupported pickle protocol" in e.args[0]:
578+
logger.warning(
579+
"Encountered unsupported pickle protocol when loading cached dataset %d: '%s'. "
580+
"Error message was: %s. "
581+
"We will continue loading data from the arff-file, "
582+
"but this will be much slower for big datasets. "
583+
"Please manually delete the cache file if you want OpenML-Python "
584+
"to attempt to reconstruct it."
585+
"" % (self.dataset_id, self.data_pickle_file, e.args[0]),
586+
)
587+
data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
588+
else:
589+
raise
542590

543591
return data, categorical, attribute_names
544592

0 commit comments

Comments
 (0)