Skip to content

Commit e4c4eb4

Browse files
fineguyThe TensorFlow Datasets Authors
authored andcommitted
Fix validating checksums path.
PiperOrigin-RevId: 670307758
1 parent 4b2da8f commit e4c4eb4

File tree

4 files changed

+23
-9
lines changed

4 files changed

+23
-9
lines changed

tensorflow_datasets/core/dataset_builder.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1280,7 +1280,11 @@ def _make_download_manager(
12801280
if download_config.register_checksums:
12811281
# Note: Error will be raised here if user try to record checksums
12821282
# from a `zipapp`
1283-
register_checksums_path = utils.to_write_path(self._checksums_path)
1283+
try:
1284+
register_checksums_path = utils.to_write_path(self._checksums_path)
1285+
download.validate_checksums_path(register_checksums_path)
1286+
except Exception: # pylint: disable=broad-except
1287+
raise
12841288
else:
12851289
register_checksums_path = None
12861290

tensorflow_datasets/core/download/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"""`tfds.download.DownloadManager` API."""
1717

1818
from tensorflow_datasets.core.download.checksums import add_checksums_dir
19+
from tensorflow_datasets.core.download.checksums import validate_checksums_path
1920
from tensorflow_datasets.core.download.download_manager import DownloadConfig
2021
from tensorflow_datasets.core.download.download_manager import DownloadManager
2122
from tensorflow_datasets.core.download.extractor import iter_archive

tensorflow_datasets/core/download/checksums.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ def save_url_infos(
216216
f'{url_info.filename or ""}\n'
217217
for url, url_info in sorted(new_data.items())
218218
]
219+
path.parent.mkdir(parents=True, exist_ok=True)
219220
path.write_text(''.join(lines), encoding='UTF-8')
220221

221222

@@ -227,3 +228,19 @@ def _filenames_equal(
227228
return all(
228229
l.filename == r.filename for _, (l, r) in utils.zip_dict(left, right)
229230
)
231+
232+
233+
def validate_checksums_path(checksums_path: epath.PathLike):
234+
"""Validates the checksums path.
235+
236+
This function creates the file if it doesn't exist, and writes to it to make
237+
sure the user has write access before downloading any files.
238+
239+
Args:
240+
checksums_path: Path to the checksums file.
241+
"""
242+
checksums_path = epath.Path(checksums_path)
243+
if not checksums_path.exists():
244+
checksums_path.touch()
245+
else:
246+
checksums_path.write_text(checksums_path.read_text())

tensorflow_datasets/core/download/download_manager.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,6 @@ def __init__(
239239
' set.'
240240
)
241241
register_checksums_path = epath.Path(register_checksums_path)
242-
if not register_checksums_path.exists():
243-
# Create the file here to make sure user has write access before
244-
# starting downloads.
245-
register_checksums_path.touch()
246-
else:
247-
# Make sure the user has write access before downloading any files.
248-
# (e.g. TFDS installed by admin)
249-
register_checksums_path.write_text(register_checksums_path.read_text())
250242

251243
download_dir = epath.Path(download_dir).expanduser()
252244
if extract_dir:

0 commit comments

Comments
 (0)