Skip to content

Commit e1b0001

Browse files
committed
Hotfix - use relative symlinks whenever possible (#1399)
* Hotfix - use relative symlinks whenever possible * renaming
1 parent 4bd3ddd commit e1b0001

File tree

2 files changed

+32
-6
lines changed

2 files changed

+32
-6
lines changed

src/huggingface_hub/file_download.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -819,14 +819,30 @@ def _normalize_etag(etag: Optional[str]) -> Optional[str]:
819819
return etag.strip('"')
820820

821821

822+
def _create_relative_symlink(src: str, dst: str, new_blob: bool = False) -> None:
823+
"""Alias method used in `transformers` conversion script."""
824+
return _create_symlink(src=src, dst=dst, new_blob=new_blob)
825+
826+
822827
def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
823-
"""Create a symbolic link named dst pointing to src as an absolute path.
828+
"""Create a symbolic link named dst pointing to src.
829+
830+
By default, it will try to create a symlink using a relative path. Relative paths have 2 advantages:
831+
- If the cache_folder is moved (example: back-up on a shared drive), relative paths within the cache folder will
832+
not brake.
833+
- Relative paths seems to be better handled on Windows. Issue was reported 3 times in less than a week when
834+
changing from relative to absolute paths. See https://github.com/huggingface/huggingface_hub/issues/1398,
835+
https://github.com/huggingface/diffusers/issues/2729 and https://github.com/huggingface/transformers/pull/22228.
836+
NOTE: The issue with absolute paths doesn't happen on admin mode.
837+
When creating a symlink from the cache to a local folder, it is possible that a relative path cannot be created.
838+
This happens when paths are not on the same volume. In that case, we use absolute paths.
839+
824840
825841
The result layout looks something like
826842
└── [ 128] snapshots
827843
├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f
828-
│ ├── [ 52] README.md -> /path/to/cache/blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
829-
│ └── [ 76] pytorch_model.bin -> /path/to/cache/blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
844+
│ ├── [ 52] README.md -> ../../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
845+
│ └── [ 76] pytorch_model.bin -> ../../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
830846
831847
If symlinks cannot be created on this platform (most likely to be Windows), the workaround is to avoid symlinks by
832848
having the actual file in `dst`. If it is a new file (`new_blob=True`), we move it to `dst`. If it is not a new file
@@ -844,6 +860,15 @@ def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
844860
abs_src = os.path.abspath(os.path.expanduser(src))
845861
abs_dst = os.path.abspath(os.path.expanduser(dst))
846862

863+
# Use relative_dst in priority
864+
try:
865+
relative_src = os.path.relpath(abs_src, os.path.dirname(abs_dst))
866+
except ValueError:
867+
# Raised on Windows if src and dst are not on the same volume. This is the case when creating a symlink to a
868+
# local_dir instead of within the cache directory.
869+
# See https://docs.python.org/3/library/os.path.html#os.path.relpath
870+
relative_src = None
871+
847872
try:
848873
_support_symlinks = are_symlinks_supported(os.path.dirname(os.path.commonpath([abs_src, abs_dst])))
849874
except PermissionError:
@@ -852,9 +877,10 @@ def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
852877
_support_symlinks = are_symlinks_supported(os.path.dirname(abs_dst))
853878

854879
if _support_symlinks:
855-
logger.info(f"Creating pointer from {abs_src} to {abs_dst}")
880+
src_rel_or_abs = relative_src or abs_src
881+
logger.info(f"Creating pointer from {src_rel_or_abs} to {abs_dst}")
856882
try:
857-
os.symlink(abs_src, abs_dst)
883+
os.symlink(src_rel_or_abs, abs_dst)
858884
except FileExistsError:
859885
if os.path.islink(abs_dst) and os.path.realpath(abs_dst) == os.path.realpath(abs_src):
860886
# `abs_dst` already exists and is a symlink to the `abs_src` blob. It is most likely that the file has

tests/test_file_download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ def test_with_local_dir_and_symlinks_and_file_not_cached(self) -> None:
530530
self.assertTrue(config_file.is_file())
531531
if os.name != "nt": # File is symlink (except in Windows CI)
532532
self.assertTrue(config_file.is_symlink())
533-
blob_path = Path(os.readlink(str(config_file))) # config_file.readlink() not supported on Python3.7
533+
blob_path = config_file.resolve()
534534
self.assertTrue(self.cache_dir in blob_path.parents) # blob is cached!
535535
else:
536536
self.assertFalse(config_file.is_symlink())

0 commit comments

Comments
 (0)