@@ -456,6 +456,17 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
456456 # The file is likely corrupt, see #780.
457457 # We deal with this when loading the data in `_load_data`.
458458 return data_pickle_file , data_feather_file , feather_attribute_file
459+ except ModuleNotFoundError :
460+ # There was some issue loading the file, see #918
461+ # We deal with this when loading the data in `_load_data`.
462+ return data_pickle_file , data_feather_file , feather_attribute_file
463+ except ValueError as e :
464+ if "unsupported pickle protocol" in e .args [0 ]:
465+ # There was some issue loading the file, see #898
466+ # We deal with this when loading the data in `_load_data`.
467+ return data_pickle_file , data_feather_file , feather_attribute_file
468+ else :
469+ raise
459470
460471 # Between v0.8 and v0.9 the format of pickled data changed from
461472 # np.ndarray to pd.DataFrame. This breaks some backwards compatibility,
@@ -473,6 +484,17 @@ def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
473484 # The file is likely corrupt, see #780.
474485 # We deal with this when loading the data in `_load_data`.
475486 return data_pickle_file , data_feather_file , feather_attribute_file
487+ except ModuleNotFoundError :
488+ # There was some issue loading the file, see #918
489+ # We deal with this when loading the data in `_load_data`.
490+ return data_pickle_file , data_feather_file , feather_attribute_file
491+ except ValueError as e :
492+ if "unsupported pickle protocol" in e .args [0 ]:
493+ # There was some issue loading the file, see #898
494+ # We deal with this when loading the data in `_load_data`.
495+ return data_pickle_file , data_feather_file , feather_attribute_file
496+ else :
497+ raise
476498
477499 logger .debug ("Data feather file already exists and is up to date." )
478500 return data_pickle_file , data_feather_file , feather_attribute_file
@@ -529,7 +551,7 @@ def _load_data(self):
529551 "Detected a corrupt cache file loading dataset %d: '%s'. "
530552 "We will continue loading data from the arff-file, "
531553 "but this will be much slower for big datasets. "
532- "Please manually delete the cache file if you want openml-python "
554+ "Please manually delete the cache file if you want OpenML-Python "
533555 "to attempt to reconstruct it."
534556 "" % (self .dataset_id , self .data_pickle_file )
535557 )
@@ -539,6 +561,32 @@ def _load_data(self):
539561 "Cannot find a pickle file for dataset {} at "
540562 "location {} " .format (self .name , self .data_pickle_file )
541563 )
564+ except ModuleNotFoundError as e :
565+ logger .warning (
566+ "Encountered error message when loading cached dataset %d: '%s'. "
567+ "Error message was: %s. "
568+ "This is most likely due to https://github.com/openml/openml-python/issues/918. "
569+ "We will continue loading data from the arff-file, "
570+ "but this will be much slower for big datasets. "
571+ "Please manually delete the cache file if you want OpenML-Python "
572+ "to attempt to reconstruct it."
573+ "" % (self .dataset_id , self .data_pickle_file , e .args [0 ]),
574+ )
575+ data , categorical , attribute_names = self ._parse_data_from_arff (self .data_file )
576+ except ValueError as e :
577+ if "unsupported pickle protocol" in e .args [0 ]:
578+ logger .warning (
579+ "Encountered unsupported pickle protocol when loading cached dataset %d: '%s'. "
580+ "Error message was: %s. "
581+ "We will continue loading data from the arff-file, "
582+ "but this will be much slower for big datasets. "
583+ "Please manually delete the cache file if you want OpenML-Python "
584+ "to attempt to reconstruct it."
585+ "" % (self .dataset_id , self .data_pickle_file , e .args [0 ]),
586+ )
587+ data , categorical , attribute_names = self ._parse_data_from_arff (self .data_file )
588+ else :
589+ raise
542590
543591 return data , categorical , attribute_names
544592
0 commit comments