64
64
from tensorflow_datasets .core .utils import file_utils
65
65
from tensorflow_datasets .core .utils import gcs_utils
66
66
from tensorflow_datasets .core .utils import read_config as read_config_lib
67
+ from tensorflow_datasets .core .utils import retry
67
68
from tensorflow_datasets .core .utils import type_utils
68
69
# pylint: enable=g-import-not-at-top
69
70
@@ -290,7 +291,8 @@ def __init__(
290
291
# Compute the base directory (for download) and dataset/version directory.
291
292
self ._data_dir_root , self ._data_dir = self ._build_data_dir (data_dir )
292
293
# If the dataset info is available, use it.
293
- if dataset_info .dataset_info_path (self .data_path ).exists ():
294
+ dataset_info_path = dataset_info .dataset_info_path (self .data_path )
295
+ if retry .retry (dataset_info_path .exists ):
294
296
self .info .read_from_directory (self ._data_dir )
295
297
else : # Use the code version (do not restore data)
296
298
self .info .initialize_from_bucket ()
@@ -466,8 +468,8 @@ def _checksums_path(cls) -> epath.Path | None:
466
468
# zipfile.Path does not have `.parts`. Additionally, `os.fspath`
467
469
# will extract the file, so use `str`.
468
470
"tensorflow_datasets" in str (new_path )
469
- and legacy_path .exists ( )
470
- and not new_path .exists ( )
471
+ and retry . retry ( legacy_path .exists )
472
+ and not retry . retry ( new_path .exists )
471
473
):
472
474
return legacy_path
473
475
else :
@@ -484,7 +486,7 @@ def url_infos(cls) -> dict[str, download.checksums.UrlInfo] | None:
484
486
# Search for the url_info file.
485
487
checksums_path = cls ._checksums_path
486
488
# If url_info file is found, load the urls
487
- if checksums_path and checksums_path .exists ( ):
489
+ if checksums_path and retry . retry ( checksums_path .exists ):
488
490
return download .checksums .load_url_infos (checksums_path )
489
491
else :
490
492
return None
@@ -624,7 +626,7 @@ def download_and_prepare(
624
626
625
627
download_config = download_config or download .DownloadConfig ()
626
628
data_path = self .data_path
627
- data_exists = data_path .exists ( )
629
+ data_exists = retry . retry ( data_path .exists )
628
630
629
631
# Saving nondeterministic_order in the DatasetInfo for documentation.
630
632
if download_config .nondeterministic_order :
@@ -640,7 +642,7 @@ def download_and_prepare(
640
642
"Deleting pre-existing dataset %s (%s)" , self .name , self .data_dir
641
643
)
642
644
data_path .rmtree () # Delete pre-existing data.
643
- data_exists = data_path .exists ( )
645
+ data_exists = retry . retry ( data_path .exists )
644
646
else :
645
647
logging .info ("Reusing dataset %s (%s)" , self .name , self .data_dir )
646
648
return
@@ -805,7 +807,7 @@ def _post_download_and_prepare_hook(self) -> None:
805
807
def _update_dataset_info (self ) -> None :
806
808
"""Updates the `dataset_info.json` file in the dataset dir."""
807
809
info_file = self .data_path / constants .DATASET_INFO_FILENAME
808
- if not info_file .exists ( ):
810
+ if not retry . retry ( info_file .exists ):
809
811
raise AssertionError (f"To update { info_file } , it must already exist." )
810
812
new_info = self .info
811
813
new_info .read_from_directory (self .data_path )
@@ -1020,7 +1022,7 @@ def as_dataset(
1020
1022
self .assert_is_not_blocked ()
1021
1023
1022
1024
# pylint: enable=line-too-long
1023
- if not self .data_path .exists ( ):
1025
+ if not retry . retry ( self .data_path .exists ):
1024
1026
raise AssertionError (
1025
1027
"Dataset %s: could not find data in %s. Please make sure to call "
1026
1028
"dataset_builder.download_and_prepare(), or pass download=True to "
@@ -1817,7 +1819,7 @@ def read_text_file(
1817
1819
"""Returns the text in the given file and records the lineage."""
1818
1820
filename = epath .Path (filename )
1819
1821
self .info .add_file_data_source_access (filename )
1820
- return filename .read_text ( encoding = encoding )
1822
+ return retry . retry ( filename .read_text , encoding = encoding )
1821
1823
1822
1824
def read_tfrecord_as_dataset (
1823
1825
self ,
@@ -2057,9 +2059,9 @@ def _save_default_config_name(
2057
2059
def load_default_config_name (builder_dir : epath .Path ) -> str | None :
2058
2060
"""Load `builder_cls` metadata (common to all builder configs)."""
2059
2061
config_path = builder_dir / ".config" / constants .METADATA_FILENAME
2060
- if not config_path .exists ( ):
2062
+ if not retry . retry ( config_path .exists ):
2061
2063
return None
2062
- data = json .loads (config_path .read_text ( ))
2064
+ data = json .loads (retry . retry ( config_path .read_text ))
2063
2065
return data .get ("default_config_name" )
2064
2066
2065
2067
0 commit comments