Skip to content

Commit 7123262

Browse files
authored
Add timeout to WeakFileLock (#2751)
* Add timeout to WeakFileLock * mypy
1 parent 50c769d commit 7123262

File tree

5 files changed

+69
-17
lines changed

5 files changed

+69
-17
lines changed

src/huggingface_hub/_local_folder.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,9 +413,14 @@ def _huggingface_dir(local_dir: Path) -> Path:
413413
gitignore_lock = path / ".gitignore.lock"
414414
if not gitignore.exists():
415415
try:
416-
with WeakFileLock(gitignore_lock):
416+
with WeakFileLock(gitignore_lock, timeout=0.1):
417417
gitignore.write_text("*")
418+
except IndexError:
419+
pass
420+
except OSError: # TimeoutError, FileNotFoundError, PermissionError, etc.
421+
pass
422+
try:
418423
gitignore_lock.unlink()
419-
except OSError: # FileNotFoundError, PermissionError, etc.
424+
except OSError:
420425
pass
421426
return path

src/huggingface_hub/utils/_fixes.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import shutil
1414
import stat
1515
import tempfile
16+
import time
1617
from functools import partial
1718
from pathlib import Path
1819
from typing import Callable, Generator, Optional, Union
@@ -83,39 +84,50 @@ def _set_write_permission_and_retry(func, path, excinfo):
8384

8485

8586
@contextlib.contextmanager
86-
def WeakFileLock(lock_file: Union[str, Path]) -> Generator[BaseFileLock, None, None]:
87+
def WeakFileLock(
88+
lock_file: Union[str, Path], *, timeout: Optional[float] = None
89+
) -> Generator[BaseFileLock, None, None]:
8790
"""A filelock with some custom logic.
8891
8992
This filelock is weaker than the default filelock in that:
9093
1. It won't raise an exception if release fails.
9194
2. It will default to a SoftFileLock if the filesystem does not support flock.
9295
9396
An INFO log message is emitted every 10 seconds if the lock is not acquired immediately.
97+
If a timeout is provided, a `filelock.Timeout` exception is raised if the lock is not acquired within the timeout.
9498
"""
95-
lock = FileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
99+
log_interval = constants.FILELOCK_LOG_EVERY_SECONDS
100+
lock = FileLock(lock_file, timeout=log_interval)
101+
start_time = time.time()
102+
96103
while True:
104+
elapsed_time = time.time() - start_time
105+
if timeout is not None and elapsed_time >= timeout:
106+
raise Timeout(str(lock_file))
107+
97108
try:
98-
lock.acquire()
109+
lock.acquire(timeout=min(log_interval, timeout - elapsed_time) if timeout else log_interval)
99110
except Timeout:
100-
logger.info("still waiting to acquire lock on %s", lock_file)
111+
logger.info(
112+
f"Still waiting to acquire lock on {lock_file} (elapsed: {time.time() - start_time:.1f} seconds)"
113+
)
101114
except NotImplementedError as e:
102115
if "use SoftFileLock instead" in str(e):
103-
# It's possible that the system does support flock, expect for one partition or filesystem.
104-
# In this case, let's default to a SoftFileLock.
105116
logger.warning(
106117
"FileSystem does not appear to support flock. Falling back to SoftFileLock for %s", lock_file
107118
)
108-
lock = SoftFileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
119+
lock = SoftFileLock(lock_file, timeout=log_interval)
109120
continue
110121
else:
111122
break
112123

113-
yield lock
114-
115124
try:
116-
return lock.release()
117-
except OSError:
125+
yield lock
126+
finally:
118127
try:
119-
Path(lock_file).unlink()
128+
lock.release()
120129
except OSError:
121-
pass
130+
try:
131+
Path(lock_file).unlink()
132+
except OSError:
133+
pass

tests/test_local_folder.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import logging
2121
import os
22+
import threading
2223
import time
2324
from pathlib import Path, WindowsPath
2425

@@ -49,6 +50,18 @@ def test_creates_huggingface_dir_with_gitignore(tmp_path: Path):
4950
assert (huggingface_dir / ".gitignore").read_text() == "*"
5051

5152

53+
def test_gitignore_lock_timeout_is_ignored(tmp_path: Path):
54+
local_dir = tmp_path / "path" / "to" / "local"
55+
56+
threads = [threading.Thread(target=_huggingface_dir, args=(local_dir,)) for _ in range(10)]
57+
for thread in threads:
58+
thread.start()
59+
for thread in threads:
60+
thread.join()
61+
assert (local_dir / ".cache" / "huggingface" / ".gitignore").exists()
62+
assert not (local_dir / ".cache" / "huggingface" / ".gitignore.lock").exists()
63+
64+
5265
def test_local_download_paths(tmp_path: Path):
5366
"""Test local download paths are valid + usable."""
5467
paths = get_local_download_paths(tmp_path, "path/in/repo.txt")

tests/test_utils_fixes.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1+
import logging
12
import unittest
23
from pathlib import Path
34

4-
from huggingface_hub.utils import SoftTemporaryDirectory, yaml_dump
5+
import filelock
6+
import pytest
7+
8+
from huggingface_hub.utils import SoftTemporaryDirectory, WeakFileLock, yaml_dump
59

610

711
class TestYamlDump(unittest.TestCase):
@@ -24,3 +28,21 @@ def test_temporary_directory(self) -> None:
2428
self.assertTrue(path.is_dir())
2529
# Tmpdir is deleted
2630
self.assertFalse(path.is_dir())
31+
32+
33+
class TestWeakFileLock:
34+
def test_lock_log_every(
35+
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
36+
) -> None:
37+
monkeypatch.setattr("huggingface_hub.constants.FILELOCK_LOG_EVERY_SECONDS", 0.1)
38+
lock_file = tmp_path / ".lock"
39+
40+
with caplog.at_level(logging.INFO, logger="huggingface_hub.utils._fixes"):
41+
with WeakFileLock(lock_file):
42+
with pytest.raises(filelock.Timeout) as exc_info:
43+
with WeakFileLock(lock_file, timeout=0.3):
44+
pass
45+
assert exc_info.value.lock_file == str(lock_file)
46+
47+
assert len(caplog.records) >= 3
48+
assert caplog.records[0].message.startswith(f"Still waiting to acquire lock on {lock_file}")

utils/check_contrib_list.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def check_contrib_list(update: bool) -> NoReturn:
5555

5656
# Check workflow is consistent with list
5757
workflow_content = WORKFLOW_PATH.read_text()
58-
_substitute = "\n".join(f'{" "*10}"{lib}",' for lib in contrib_list)
58+
_substitute = "\n".join(f'{" " * 10}"{lib}",' for lib in contrib_list)
5959
workflow_content_expected = WORKFLOW_REGEX.sub(rf"\g<before>{_substitute}\n\g<after>", workflow_content)
6060

6161
#

0 commit comments

Comments
 (0)