Skip to content

Commit 250d757

Browse files
committed
Set correct permissions on blob file (#1220)
* Set correct permissions on blob file * Hack to get current umask thread-safe
1 parent fb9f637 commit 250d757

File tree

2 files changed

+49
-2
lines changed

2 files changed

+49
-2
lines changed

src/huggingface_hub/file_download.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import os
66
import re
77
import shutil
8+
import stat
89
import tempfile
10+
import uuid
911
import warnings
1012
from contextlib import contextmanager
1113
from dataclasses import dataclass
@@ -747,7 +749,7 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]:
747749
)
748750

749751
logger.info("storing %s in cache at %s", url, cache_path)
750-
os.replace(temp_file.name, cache_path)
752+
_chmod_and_replace(temp_file.name, cache_path)
751753

752754
if force_filename is None:
753755
logger.info("creating metadata file for %s", cache_path)
@@ -1246,7 +1248,7 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]:
12461248
)
12471249

12481250
logger.info("storing %s in cache at %s", url, blob_path)
1249-
os.replace(temp_file.name, blob_path)
1251+
_chmod_and_replace(temp_file.name, blob_path)
12501252

12511253
logger.info("creating pointer to %s from %s", blob_path, pointer_path)
12521254
_create_relative_symlink(blob_path, pointer_path, new_blob=True)
@@ -1398,3 +1400,28 @@ def _int_or_none(value: Optional[str]) -> Optional[int]:
13981400
return int(value) # type: ignore
13991401
except (TypeError, ValueError):
14001402
return None
1403+
1404+
1405+
def _chmod_and_replace(src: str, dst: str) -> None:
1406+
"""Set correct permission before moving a blob from tmp directory to cache dir.
1407+
1408+
Do not take into account the `umask` from the process as there is no convenient way
1409+
to get it that is thread-safe.
1410+
1411+
See:
1412+
- About umask: https://docs.python.org/3/library/os.html#os.umask
1413+
- Thread-safety: https://stackoverflow.com/a/70343066
1414+
- About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591
1415+
- Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141
1416+
- Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215
1417+
"""
1418+
# Get umask by creating a temporary file in the cached repo folder.
1419+
tmp_file = Path(dst).parent.parent / f"tmp_{uuid.uuid4()}"
1420+
try:
1421+
tmp_file.touch()
1422+
cache_dir_mode = Path(tmp_file).stat().st_mode
1423+
os.chmod(src, stat.S_IMODE(cache_dir_mode))
1424+
finally:
1425+
tmp_file.unlink()
1426+
1427+
os.replace(src, dst)

tests/test_file_download.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414
import os
1515
import re
16+
import stat
1617
import unittest
1718
from pathlib import Path
1819
from tempfile import TemporaryDirectory
@@ -223,6 +224,25 @@ def test_dataset_lfs_object(self):
223224
(url, '"95aa6a52d5d6a735563366753ca50492a658031da74f301ac5238b03966972c9"'),
224225
)
225226

227+
def test_hf_hub_download_custom_cache_permission(self):
228+
"""Checks `hf_hub_download` respect the cache dir permission.
229+
230+
Regression test for #1141 #1215.
231+
https://github.com/huggingface/huggingface_hub/issues/1141
232+
https://github.com/huggingface/huggingface_hub/issues/1215
233+
"""
234+
with TemporaryDirectory() as tmpdir:
235+
# Equivalent to umask u=rwx,g=r,o=
236+
previous_umask = os.umask(0o037)
237+
try:
238+
filepath = hf_hub_download(
239+
DUMMY_RENAMED_OLD_MODEL_ID, "config.json", cache_dir=tmpdir
240+
)
241+
# Permissions are honored (640: u=rw,g=r,o=)
242+
self.assertEqual(stat.S_IMODE(os.stat(filepath).st_mode), 0o640)
243+
finally:
244+
os.umask(previous_umask)
245+
226246
def test_download_from_a_renamed_repo_with_hf_hub_download(self):
227247
"""Checks `hf_hub_download` works also on a renamed repo.
228248

0 commit comments

Comments
 (0)