Skip to content

Commit 94ef43d

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Add option to download_and_prepare to set the permissions of newly created folders and files
Moved `incomplete_dir` to file_utils because I think it fits there better. PiperOrigin-RevId: 652453020
1 parent e51d412 commit 94ef43d

File tree

5 files changed

+51
-31
lines changed

5 files changed

+51
-31
lines changed

tensorflow_datasets/core/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from tensorflow_datasets.core.utils.benchmark import BenchmarkResult
5050
from tensorflow_datasets.core.utils.file_utils import add_data_dir
5151
from tensorflow_datasets.core.utils.file_utils import as_path
52+
from tensorflow_datasets.core.utils.file_utils import Permissions
5253
from tensorflow_datasets.core.writer import ExampleWriter
5354

5455

@@ -79,6 +80,7 @@ def benchmark(*args, **kwargs):
7980
"Metadata",
8081
"MetadataDict",
8182
"Path",
83+
"Permissions",
8284
"ReadInstruction",
8385
"SequentialWriter",
8486
"ShardedFileTemplate",

tensorflow_datasets/core/dataset_builder.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -561,9 +561,10 @@ def is_prepared(self) -> bool:
561561
def download_and_prepare(
562562
self,
563563
*,
564-
download_dir: Optional[epath.PathLike] = None,
565-
download_config: Optional[download.DownloadConfig] = None,
566-
file_format: Optional[Union[str, file_adapters.FileFormat]] = None,
564+
download_dir: epath.PathLike | None = None,
565+
download_config: download.DownloadConfig | None = None,
566+
file_format: str | file_adapters.FileFormat | None = None,
567+
permissions: file_utils.Permissions | None = None,
567568
) -> None:
568569
"""Downloads and prepares dataset for reading.
569570
@@ -574,6 +575,8 @@ def download_and_prepare(
574575
downloading and preparing dataset.
575576
file_format: optional `str` or `file_adapters.FileFormat`, format of the
576577
record files in which the dataset will be written.
578+
permissions: optional permissions to set on the generated folder and
579+
files.
577580
578581
Raises:
579582
IOError: if there is not enough disk space available.
@@ -694,7 +697,9 @@ def download_and_prepare(
694697
self.info.set_file_format(file_format, override=True)
695698

696699
# Create a tmp dir and rename to self.data_dir on successful exit.
697-
with utils.incomplete_dir(self.data_dir) as tmp_data_dir:
700+
with utils.incomplete_dir(
701+
dirname=self.data_dir, permissions=permissions
702+
) as tmp_data_dir:
698703
# Temporarily assign _data_dir to tmp_data_dir to avoid having to forward
699704
# it to every sub function.
700705
with utils.temporary_assignment(self, "_data_dir", tmp_data_dir):

tensorflow_datasets/core/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from tensorflow_datasets.core.units import Size
1919
from tensorflow_datasets.core.utils import docs
20+
from tensorflow_datasets.core.utils.file_utils import incomplete_dir
2021
from tensorflow_datasets.core.utils.gcs_utils import gcs_path
2122
from tensorflow_datasets.core.utils.image_utils import apply_colormap
2223
from tensorflow_datasets.core.utils.image_utils import create_thumbnail
@@ -40,7 +41,6 @@
4041
from tensorflow_datasets.core.utils.py_utils import get_class_path
4142
from tensorflow_datasets.core.utils.py_utils import get_class_url
4243
from tensorflow_datasets.core.utils.py_utils import has_sufficient_disk_space
43-
from tensorflow_datasets.core.utils.py_utils import incomplete_dir
4444
from tensorflow_datasets.core.utils.py_utils import incomplete_file
4545
from tensorflow_datasets.core.utils.py_utils import indent
4646
from tensorflow_datasets.core.utils.py_utils import is_incomplete_file

tensorflow_datasets/core/utils/file_utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@
1919

2020
import collections
2121
from collections.abc import Iterator, Sequence
22+
import contextlib
23+
import dataclasses
2224
import functools
2325
import os
26+
import random
2427
import re
28+
import string
2529
import time
2630

2731
from absl import logging
@@ -43,6 +47,15 @@
4347
_GLOB_CHARS = ['*', '?', '[']
4448

4549

50+
@dataclasses.dataclass(frozen=True)
51+
class Permissions:
52+
"""Permissions for a file or directory."""
53+
54+
owner: str | None = None
55+
group: str | None = None
56+
mode: int | None = None
57+
58+
4659
@docs.deprecated
4760
def as_path(path: PathLike) -> Path:
4861
"""DEPRECATED. Please use `from etils import epath` with `epath.Path()`."""
@@ -87,6 +100,32 @@ def add_data_dir(data_dir):
87100
_registered_data_dir.add(data_dir)
88101

89102

103+
def _get_incomplete_dir(dir_name: str) -> str:
104+
"""Returns a temporary dir name based on `dir_name`."""
105+
random_suffix = ''.join(
106+
random.choice(string.ascii_uppercase + string.digits) for _ in range(6)
107+
)
108+
dir_name = epath.Path(dir_name)
109+
return f'{dir_name.parent}/{constants.INCOMPLETE_PREFIX}{random_suffix}_{dir_name.name}/'
110+
111+
112+
@contextlib.contextmanager
113+
def incomplete_dir(
114+
dirname: epath.PathLike, permissions: Permissions | None = None
115+
) -> Iterator[str]:
116+
"""Create temporary dir for dirname and rename on exit."""
117+
dirname = os.fspath(dirname)
118+
tmp_dir = _get_incomplete_dir(dirname)
119+
tmp_path = epath.Path(tmp_dir)
120+
tmp_path.mkdir(parents=True, exist_ok=True)
121+
try:
122+
yield tmp_dir
123+
tmp_path.rename(dirname)
124+
finally:
125+
if tmp_path.exists():
126+
tmp_path.rmtree()
127+
128+
90129
def list_data_dirs(
91130
given_data_dir: ListOrElem[PathLike] | None = None,
92131
dataset: str | None = None,

tensorflow_datasets/core/utils/py_utils.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,8 @@
2525
import itertools
2626
import logging
2727
import os
28-
import random
2928
import re
3029
import shutil
31-
import string
3230
import sys
3331
import textwrap
3432
import threading
@@ -312,30 +310,6 @@ def nullcontext(enter_result: T = None) -> Iterator[T]:
312310
yield enter_result
313311

314312

315-
def _get_incomplete_dir(dir_name: str) -> str:
316-
"""Returns a temporary dir name based on `dir_name`."""
317-
random_suffix = ''.join(
318-
random.choice(string.ascii_uppercase + string.digits) for _ in range(6)
319-
)
320-
dir_name = epath.Path(dir_name)
321-
return f'{dir_name.parent}/{constants.INCOMPLETE_PREFIX}{random_suffix}_{dir_name.name}/'
322-
323-
324-
@contextlib.contextmanager
325-
def incomplete_dir(dirname: epath.PathLike) -> Iterator[str]:
326-
"""Create temporary dir for dirname and rename on exit."""
327-
dirname = os.fspath(dirname)
328-
tmp_dir = _get_incomplete_dir(dirname)
329-
tmp_path = epath.Path(tmp_dir)
330-
tmp_path.mkdir(parents=True, exist_ok=True)
331-
try:
332-
yield tmp_dir
333-
tmp_path.rename(dirname)
334-
finally:
335-
if tmp_path.exists():
336-
tmp_path.rmtree()
337-
338-
339313
def _tmp_file_name(path: epath.PathLike) -> epath.Path:
340314
path = epath.Path(path)
341315
return (

0 commit comments

Comments
 (0)