Skip to content

Commit 6cc7606

Browse files
committed
Merge branch 'main' into buckets-api
2 parents 70c5fa9 + 1346ac6 commit 6cc7606

File tree

8 files changed

+87
-7
lines changed

8 files changed

+87
-7
lines changed

src/huggingface_hub/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from typing import TYPE_CHECKING
4747

4848

49-
__version__ = "1.3.0.dev0"
49+
__version__ = "1.5.0.dev0"
5050

5151
# Alphabetical order of definitions is ensured in tests
5252
# WARNING: any comment added in this dictionary definition will be lost when

src/huggingface_hub/cli/hf.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414

1515
import sys
1616
import traceback
17+
from typing import Annotated, Optional
1718

18-
from huggingface_hub import constants
19+
import typer
20+
21+
from huggingface_hub import __version__, constants
1922
from huggingface_hub.cli._cli_utils import check_cli_update, typer_factory
2023
from huggingface_hub.cli._errors import format_known_exception
2124
from huggingface_hub.cli.auth import auth_cli
@@ -43,6 +46,21 @@
4346
app = typer_factory(help="Hugging Face Hub CLI")
4447

4548

49+
def _version_callback(value: bool) -> None:
50+
if value:
51+
print(__version__)
52+
raise typer.Exit()
53+
54+
55+
@app.callback(invoke_without_command=True)
56+
def app_callback(
57+
version: Annotated[
58+
Optional[bool], typer.Option("--version", callback=_version_callback, is_eager=True, hidden=True)
59+
] = None,
60+
) -> None:
61+
pass
62+
63+
4664
# top level single commands (defined in their respective files)
4765
app.command()(sync)
4866
app.command(examples=DOWNLOAD_EXAMPLES)(download)

src/huggingface_hub/cli/jobs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import multiprocessing
6363
import multiprocessing.pool
6464
import os
65+
import shutil
6566
import time
6667
from dataclasses import asdict
6768
from fnmatch import fnmatch
@@ -1010,7 +1011,7 @@ def _tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str:
10101011
- stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
10111012
"""
10121013
col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
1013-
terminal_width = max(os.get_terminal_size().columns, len(headers) * 12)
1014+
terminal_width = max(shutil.get_terminal_size().columns, len(headers) * 12)
10141015
while len(headers) + sum(col_widths) > terminal_width:
10151016
col_to_minimize = col_widths.index(max(col_widths))
10161017
col_widths[col_to_minimize] //= 2

src/huggingface_hub/file_download.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,14 @@ def http_get(
418418
retry_on_status_codes=(429,),
419419
) as response:
420420
hf_raise_for_status(response)
421+
422+
# If we requested a Range but got 200 back, the server ignored our Range header
423+
# (e.g. CloudFront with Accept-Encoding: gzip). Reset file to avoid corruption.
424+
if resume_size > 0 and response.status_code == 200:
425+
temp_file.seek(0)
426+
temp_file.truncate()
427+
resume_size = 0
428+
421429
total: Optional[int] = _get_file_length_from_http_response(response)
422430

423431
if displayed_filename is None:

src/huggingface_hub/hf_api.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,8 @@ class RepoFile:
673673
The file's git OID.
674674
lfs (`BlobLfsInfo`, *optional*):
675675
The file's LFS metadata.
676+
xet_hash (`str`, *optional*):
677+
The file's Xet hash.
676678
last_commit (`LastCommitInfo`, *optional*):
677679
The file's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`]
678680
are called with `expand=True`.
@@ -685,6 +687,7 @@ class RepoFile:
685687
size: int
686688
blob_id: str
687689
lfs: Optional[BlobLfsInfo] = None
690+
xet_hash: Optional[str] = None
688691
last_commit: Optional[LastCommitInfo] = None
689692
security: Optional[BlobSecurityInfo] = None
690693

@@ -696,6 +699,7 @@ def __init__(self, **kwargs):
696699
if lfs is not None:
697700
lfs = BlobLfsInfo(size=lfs["size"], sha256=lfs["oid"], pointer_size=lfs["pointerSize"])
698701
self.lfs = lfs
702+
self.xet_hash = kwargs.pop("xetHash", None)
699703
last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None)
700704
if last_commit is not None:
701705
last_commit = LastCommitInfo(

src/huggingface_hub/hf_file_system.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ def _ls_tree(
563563
"type": "file",
564564
"blob_id": path_info.blob_id,
565565
"lfs": path_info.lfs,
566+
"xet_hash": path_info.xet_hash,
566567
"last_commit": path_info.last_commit,
567568
"security": path_info.security,
568569
}
@@ -830,6 +831,7 @@ def info(self, path: str, refresh: bool = False, revision: Optional[str] = None,
830831
"type": "file",
831832
"blob_id": path_info.blob_id,
832833
"lfs": path_info.lfs,
834+
"xet_hash": path_info.xet_hash,
833835
"last_commit": path_info.last_commit,
834836
"security": path_info.security,
835837
}

tests/test_file_download.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,47 @@ def _iter_content_4() -> Iterable[bytes]:
11521152
for i, expected_range in enumerate(expected_ranges):
11531153
assert mock_stream_backoff.call_args_list[i].kwargs["headers"] == {"Range": expected_range}
11541154

1155+
def test_http_get_retry_resets_file_when_range_ignored(self, caplog):
1156+
"""Test that http_get resets the file when the server ignores the Range header.
1157+
1158+
When a download is interrupted and retried with a Range header, some servers
1159+
(e.g. CloudFront with Accept-Encoding: gzip) ignore the Range header and return
1160+
200 with the full file instead of 206. In that case, the code must truncate
1161+
the file before writing to avoid appending the full content to partial data.
1162+
"""
1163+
1164+
def _iter_content_1() -> Iterable[bytes]:
1165+
yield b"A" * 30
1166+
raise httpx.TimeoutException("Fake timeout")
1167+
1168+
def _iter_content_2() -> Iterable[bytes]:
1169+
# Server ignores Range, returns full content
1170+
yield b"B" * 100
1171+
1172+
mock_response_1 = Mock()
1173+
mock_response_1.status_code = 200
1174+
mock_response_1.headers = {"Content-Length": "100"}
1175+
mock_response_1.iter_bytes.return_value = _iter_content_1()
1176+
1177+
mock_response_2 = Mock()
1178+
mock_response_2.status_code = 200 # 200, not 206 — Range was ignored
1179+
mock_response_2.headers = {"Content-Length": "100"}
1180+
mock_response_2.iter_bytes.return_value = _iter_content_2()
1181+
1182+
mock_responses = iter([mock_response_1, mock_response_2])
1183+
1184+
@contextmanager
1185+
def _mock_stream(*args, **kwargs):
1186+
yield next(mock_responses)
1187+
1188+
with patch("huggingface_hub.file_download.http_stream_backoff", side_effect=_mock_stream):
1189+
temp_file = io.BytesIO()
1190+
http_get("fake_url", temp_file=temp_file)
1191+
1192+
# File should contain only the full content from retry (100 bytes), not 130
1193+
assert temp_file.tell() == 100
1194+
assert temp_file.getvalue() == b"B" * 100
1195+
11551196

11561197
class CreateSymlinkTest(unittest.TestCase):
11571198
@unittest.skipIf(os.name == "nt", "No symlinks on Windows")

tests/test_hf_api.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,12 +1450,12 @@ def tearDownClass(cls):
14501450

14511451
def test_list_tree(self):
14521452
tree = list(self._api.list_repo_tree(repo_id=self.repo_id))
1453-
self.assertEqual(len(tree), 6)
1454-
self.assertEqual({tree_obj.path for tree_obj in tree}, {"file.md", "lfs.bin", "1", "2", "3", ".gitattributes"})
1453+
assert len(tree) == 6
1454+
assert {tree_obj.path for tree_obj in tree} == {"file.md", "lfs.bin", "1", "2", "3", ".gitattributes"}
14551455

14561456
tree = list(self._api.list_repo_tree(repo_id=self.repo_id, path_in_repo="1"))
1457-
self.assertEqual(len(tree), 2)
1458-
self.assertEqual({tree_obj.path for tree_obj in tree}, {"1/file_1.md", "1/2"})
1457+
assert len(tree) == 2
1458+
assert {tree_obj.path for tree_obj in tree} == {"1/file_1.md", "1/2"}
14591459

14601460
def test_list_tree_recursively(self):
14611461
tree = list(self._api.list_repo_tree(repo_id=self.repo_id, recursive=True))
@@ -1530,6 +1530,12 @@ def test_list_files_without_expand(self):
15301530
feature_extractor = next(tree_obj for tree_obj in tree if tree_obj.path == "feature_extractor")
15311531
self.assertIsNone(feature_extractor.last_commit)
15321532

1533+
@with_production_testing
1534+
def test_list_tree_with_xethash(self):
1535+
tree = list(HfApi().list_repo_tree(repo_id="openai-community/gpt2"))
1536+
model_entry = next(tree_obj for tree_obj in tree if tree_obj.path == "model.safetensors")
1537+
assert model_entry.xet_hash == "63bed80836ee0758c8fd4f8975d59bb0b864263ee2753547c358e8a37cde8758"
1538+
15331539

15341540
class HfApiTagEndpointTest(HfApiCommonTest):
15351541
@use_tmp_repo("model")

0 commit comments

Comments
 (0)