Skip to content

Commit 0848f80

Browse files
committed
[hot-fix] Malicious repo can overwrite any file on disk (#1429)
* Add regression test * add protections + fix tests * fix widnwso test * fix widnwso test * FIX resolving path without following symlinks * increase pause time in test * fix windows test * Update src/huggingface_hub/file_download.py
1 parent 58d8242 commit 0848f80

File tree

4 files changed

+99
-9
lines changed

4 files changed

+99
-9
lines changed

src/huggingface_hub/_login.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,13 @@ def interpreter_login() -> None:
121121
122122
For more details, see [`login`].
123123
"""
124-
print( # docstyle-ignore
125-
"""
124+
print("""
126125
_| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|
127126
_| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
128127
_|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|
129128
_| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
130129
_| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|
131-
"""
132-
)
130+
""") # docstyle-ignore
133131
if HfFolder.get_token() is not None:
134132
print(
135133
" A token is already saved on your machine. Run `huggingface-cli"

src/huggingface_hub/file_download.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,11 +1132,17 @@ def hf_hub_download(
11321132

11331133
# cross platform transcription of filename, to be used as a local file path.
11341134
relative_filename = os.path.join(*filename.split("/"))
1135+
if os.name == "nt":
1136+
if relative_filename.startswith("..\\") or "\\..\\" in relative_filename:
1137+
raise ValueError(
1138+
f"Invalid filename: cannot handle filename '{relative_filename}' on Windows. Please ask the repository"
1139+
" owner to rename this file."
1140+
)
11351141

11361142
# if user provides a commit_hash and they already have the file on disk,
11371143
# shortcut everything.
11381144
if REGEX_COMMIT_HASH.match(revision):
1139-
pointer_path = os.path.join(storage_folder, "snapshots", revision, relative_filename)
1145+
pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
11401146
if os.path.exists(pointer_path):
11411147
if local_dir is not None:
11421148
return _to_local_dir(pointer_path, local_dir, relative_filename, use_symlinks=local_dir_use_symlinks)
@@ -1231,7 +1237,7 @@ def hf_hub_download(
12311237

12321238
# Return pointer file if exists
12331239
if commit_hash is not None:
1234-
pointer_path = os.path.join(storage_folder, "snapshots", commit_hash, relative_filename)
1240+
pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
12351241
if os.path.exists(pointer_path):
12361242
if local_dir is not None:
12371243
return _to_local_dir(
@@ -1260,7 +1266,7 @@ def hf_hub_download(
12601266
assert etag is not None, "etag must have been retrieved from server"
12611267
assert commit_hash is not None, "commit_hash must have been retrieved from server"
12621268
blob_path = os.path.join(storage_folder, "blobs", etag)
1263-
pointer_path = os.path.join(storage_folder, "snapshots", commit_hash, relative_filename)
1269+
pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
12641270

12651271
os.makedirs(os.path.dirname(blob_path), exist_ok=True)
12661272
os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
@@ -1549,14 +1555,34 @@ def _chmod_and_replace(src: str, dst: str) -> None:
15491555
os.replace(src, dst)
15501556

15511557

1558+
def _get_pointer_path(storage_folder: str, revision: str, relative_filename: str) -> str:
1559+
# Using `os.path.abspath` instead of `Path.resolve()` to avoid resolving symlinks
1560+
snapshot_path = os.path.join(storage_folder, "snapshots")
1561+
pointer_path = os.path.join(snapshot_path, revision, relative_filename)
1562+
if Path(os.path.abspath(snapshot_path)) not in Path(os.path.abspath(pointer_path)).parents:
1563+
raise ValueError(
1564+
"Invalid pointer path: cannot create pointer path in snapshot folder if"
1565+
f" `storage_folder='{storage_folder}'`, `revision='{revision}'` and"
1566+
f" `relative_filename='{relative_filename}'`."
1567+
)
1568+
return pointer_path
1569+
1570+
15521571
def _to_local_dir(
15531572
path: str, local_dir: str, relative_filename: str, use_symlinks: Union[bool, Literal["auto"]]
15541573
) -> str:
15551574
"""Place a file in a local dir (different than cache_dir).
15561575
15571576
Either symlink to blob file in cache or duplicate file depending on `use_symlinks` and file size.
15581577
"""
1578+
# Using `os.path.abspath` instead of `Path.resolve()` to avoid resolving symlinks
15591579
local_dir_filepath = os.path.join(local_dir, relative_filename)
1580+
if Path(os.path.abspath(local_dir)) not in Path(os.path.abspath(local_dir_filepath)).parents:
1581+
raise ValueError(
1582+
f"Cannot copy file '{relative_filename}' to local dir '{local_dir}': file would not be in the local"
1583+
" directory."
1584+
)
1585+
15601586
os.makedirs(os.path.dirname(local_dir_filepath), exist_ok=True)
15611587
real_blob_path = os.path.realpath(path)
15621588

tests/test_file_download.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from huggingface_hub.file_download import (
3333
_CACHED_NO_EXIST,
3434
_create_symlink,
35+
_get_pointer_path,
36+
_to_local_dir,
3537
cached_download,
3638
filename_to_url,
3739
get_hf_file_metadata,
@@ -737,6 +739,69 @@ def test_hf_hub_download_on_awful_subfolder_and_filename(self):
737739
self.assertTrue(local_path.endswith(self.filepath))
738740

739741

742+
@pytest.mark.usefixtures("fx_cache_dir")
743+
class TestHfHubDownloadRelativePaths(unittest.TestCase):
744+
"""Regression test for HackerOne report 1928845.
745+
746+
Issue was that any file outside of the local dir could be overwritten (Windows only).
747+
748+
In the end, multiple protections have been added to prevent this (..\\ in filename forbidden on Windows, always check
749+
the filepath is in local_dir/snapshot_dir).
750+
"""
751+
752+
cache_dir: Path
753+
754+
@classmethod
755+
def setUpClass(cls):
756+
cls.api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN)
757+
cls.repo_id = cls.api.create_repo(repo_id=repo_name()).repo_id
758+
cls.api.upload_file(path_or_fileobj=b"content", path_in_repo="..\\ddd", repo_id=cls.repo_id)
759+
cls.api.upload_file(path_or_fileobj=b"content", path_in_repo="folder/..\\..\\..\\file", repo_id=cls.repo_id)
760+
761+
@classmethod
762+
def tearDownClass(cls) -> None:
763+
cls.api.delete_repo(repo_id=cls.repo_id)
764+
765+
@xfail_on_windows(reason="Windows paths cannot start with '..\\'.", raises=ValueError)
766+
def test_download_file_in_cache_dir(self) -> None:
767+
hf_hub_download(self.repo_id, "..\\ddd", cache_dir=self.cache_dir)
768+
769+
@xfail_on_windows(reason="Windows paths cannot start with '..\\'.", raises=ValueError)
770+
def test_download_file_to_local_dir(self) -> None:
771+
with SoftTemporaryDirectory() as local_dir:
772+
hf_hub_download(self.repo_id, "..\\ddd", cache_dir=self.cache_dir, local_dir=local_dir)
773+
774+
@xfail_on_windows(reason="Windows paths cannot contain '\\..\\'.", raises=ValueError)
775+
def test_download_folder_file_in_cache_dir(self) -> None:
776+
hf_hub_download(self.repo_id, "folder/..\\..\\..\\file", cache_dir=self.cache_dir)
777+
778+
@xfail_on_windows(reason="Windows paths cannot contain '\\..\\'.", raises=ValueError)
779+
def test_download_folder_file_to_local_dir(self) -> None:
780+
with SoftTemporaryDirectory() as local_dir:
781+
hf_hub_download(self.repo_id, "folder/..\\..\\..\\file", cache_dir=self.cache_dir, local_dir=local_dir)
782+
783+
def test_get_pointer_path_and_valid_relative_filename(self) -> None:
784+
# Cannot happen because of other protections, but just in case.
785+
self.assertEqual(
786+
_get_pointer_path("path/to/storage", "abcdef", "path/to/file.txt"),
787+
os.path.join("path/to/storage", "snapshots", "abcdef", "path/to/file.txt"),
788+
)
789+
790+
def test_get_pointer_path_but_invalid_relative_filename(self) -> None:
791+
# Cannot happen because of other protections, but just in case.
792+
relative_filename = "folder\\..\\..\\..\\file.txt" if os.name == "nt" else "folder/../../../file.txt"
793+
with self.assertRaises(ValueError):
794+
_get_pointer_path("path/to/storage", "abcdef", relative_filename)
795+
796+
def test_to_local_dir_but_invalid_relative_filename(self) -> None:
797+
# Cannot happen because of other protections, but just in case.
798+
relative_filename = "folder\\..\\..\\..\\file.txt" if os.name == "nt" else "folder/../../../file.txt"
799+
with self.assertRaises(ValueError):
800+
_to_local_dir(
801+
"path/to/file_to_copy", "path/to/local/dir", relative_filename=relative_filename, use_symlinks=False
802+
)
803+
804+
740805
class CreateSymlinkTest(unittest.TestCase):
741806
@unittest.skipIf(os.name == "nt", "No symlinks on Windows")
742807
@patch("huggingface_hub.file_download.are_symlinks_supported")

tests/test_hf_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,8 +2439,9 @@ def test_space_runtime(self) -> None:
24392439
def test_pause_and_restart_space(self) -> None:
24402440
runtime_after_pause = self.api.pause_space(self.repo_id)
24412441
self.assertEqual(runtime_after_pause.stage, SpaceStage.PAUSED)
2442-
2443-
runtime_after_restart = self.api.restart_space(self.repo_id)
2442+
self.api.restart_space(self.repo_id)
2443+
time.sleep(1.0)
2444+
runtime_after_restart = self.api.get_space_runtime(self.repo_id)
24442445
self.assertIn(runtime_after_restart.stage, (SpaceStage.BUILDING, SpaceStage.RUNNING_BUILDING))
24452446

24462447

0 commit comments

Comments
 (0)