Skip to content

Commit 02c6e52

Browse files
authored
Fix filename too long when downloading to local folder (#2789)
1 parent d2cab33 commit 02c6e52

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

src/huggingface_hub/_local_folder.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
```
5050
"""
5151

52+
import base64
53+
import hashlib
5254
import logging
5355
import os
5456
import time
@@ -84,7 +86,7 @@ class LocalDownloadFilePaths:
8486

8587
def incomplete_path(self, etag: str) -> Path:
8688
"""Return the path where a file will be temporarily downloaded before being moved to `file_path`."""
87-
return self.metadata_path.with_suffix(f".{etag}.incomplete")
89+
return self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete"
8890

8991

9092
@dataclass(frozen=True)
@@ -424,3 +426,7 @@ def _huggingface_dir(local_dir: Path) -> Path:
424426
except OSError:
425427
pass
426428
return path
429+
430+
431+
def _short_hash(filename: str) -> str:
432+
return base64.urlsafe_b64encode(hashlib.sha1(filename.encode()).digest()).decode()

tests/test_local_folder.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,17 @@ def test_local_download_paths(tmp_path: Path):
7979
assert paths.metadata_path.parent.is_dir()
8080
assert paths.lock_path.parent.is_dir()
8181

82-
# Incomplete path are etag-based
83-
assert (
84-
paths.incomplete_path("etag123")
85-
== tmp_path / ".cache" / "huggingface" / "download" / "path" / "in" / "repo.txt.etag123.incomplete"
86-
)
82+
# Incomplete paths are etag-based
83+
incomplete_path = paths.incomplete_path("etag123")
84+
assert incomplete_path.parent == tmp_path / ".cache" / "huggingface" / "download" / "path" / "in"
85+
assert incomplete_path.name.endswith(".etag123.incomplete")
8786
assert paths.incomplete_path("etag123").parent.is_dir()
8887

88+
# Incomplete paths are unique per file per etag
89+
other_paths = get_local_download_paths(tmp_path, "path/in/repo_other.txt")
90+
other_incomplete_path = other_paths.incomplete_path("etag123")
91+
assert incomplete_path != other_incomplete_path # different .incomplete files to prevent concurrency issues
92+
8993

9094
def test_local_download_paths_are_recreated_each_time(tmp_path: Path):
9195
paths1 = get_local_download_paths(tmp_path, "path/in/repo.txt")

0 commit comments

Comments
 (0)