Skip to content

Commit b1a11c2

Browse files
authored
Fix get file size on lfs (#1188)
* Fix get file size on LFS * fix typos + add description
1 parent 131fd35 commit b1a11c2

File tree

6 files changed

+28
-4
lines changed

6 files changed

+28
-4
lines changed

.github/ISSUE_TEMPLATE/bug-report.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ body:
3636
huggingface-cli env
3737
```
3838
39-
If your are working in a notebook, please run it in a code cell:
39+
If you are working in a notebook, please run it in a code cell:
4040
```py
4141
from huggingface_hub import dump_environment_info
4242

src/huggingface_hub/_commit_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def fetch_upload_modes(
471471
if not path.endswith(".gitkeep"):
472472
warnings.warn(
473473
f"About to commit an empty file: '{path}'. Are you sure this is"
474-
" intended ?"
474+
" intended?"
475475
)
476476
upload_modes[path] = "regular"
477477

src/huggingface_hub/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def _is_true_or_auto(value: Optional[str]) -> bool:
4646
HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
4747
HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
4848
HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
49+
HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
4950

5051
REPO_ID_SEPARATOR = "--"
5152
# ^ this substring is not allowed in repo_ids on hf.co

src/huggingface_hub/file_download.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
HF_HUB_DISABLE_SYMLINKS_WARNING,
2727
HUGGINGFACE_CO_URL_TEMPLATE,
2828
HUGGINGFACE_HEADER_X_LINKED_ETAG,
29+
HUGGINGFACE_HEADER_X_LINKED_SIZE,
2930
HUGGINGFACE_HEADER_X_REPO_COMMIT,
3031
HUGGINGFACE_HUB_CACHE,
3132
REPO_ID_SEPARATOR,
@@ -146,7 +147,8 @@ class HfFileMetadata:
146147
location (`str`):
147148
Location where to download the file. Can be a Hub url or not (CDN).
148149
size (`size`):
149-
Size of the file.
150+
Size of the file. In case of an LFS file, contains the size of the actual
151+
LFS file, not the pointer.
150152
"""
151153

152154
commit_hash: Optional[str]
@@ -1384,7 +1386,10 @@ def get_hf_file_metadata(
13841386
# Do not use directly `url`, as `_request_wrapper` might have followed relative
13851387
# redirects.
13861388
location=r.headers.get("Location") or r.request.url, # type: ignore
1387-
size=_int_or_none(r.headers.get("Content-Length")),
1389+
size=_int_or_none(
1390+
r.headers.get(HUGGINGFACE_HEADER_X_LINKED_SIZE)
1391+
or r.headers.get("Content-Length")
1392+
),
13881393
)
13891394

13901395

src/huggingface_hub/utils/_runtime.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,13 @@ def is_google_colab() -> bool:
182182

183183

184184
def dump_environment_info() -> Dict[str, Any]:
185+
"""Dump information about the machine to help debugging issues.
186+
187+
Similar helper exist in:
188+
- `datasets` (https://github.com/huggingface/datasets/blob/main/src/datasets/commands/env.py)
189+
- `diffusers` (https://github.com/huggingface/diffusers/blob/main/src/diffusers/commands/env.py)
190+
- `transformers` (https://github.com/huggingface/transformers/blob/main/src/transformers/commands/env.py)
191+
"""
185192
from huggingface_hub import HfFolder, whoami
186193
from huggingface_hub.utils import list_credential_helpers
187194

tests/test_file_download.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,17 @@ def test_get_hf_file_metadata_from_a_renamed_repo(self) -> None:
376376
url.replace(DUMMY_RENAMED_OLD_MODEL_ID, DUMMY_RENAMED_NEW_MODEL_ID),
377377
)
378378

379+
def test_get_hf_file_metadata_from_a_lfs_file(self) -> None:
380+
"""Test getting metadata from an LFS file.
381+
382+
Must get size of the LFS file, not size of the pointer file
383+
"""
384+
url = hf_hub_url("gpt2", filename="tf_model.h5")
385+
metadata = get_hf_file_metadata(url)
386+
387+
self.assertIn("cdn-lfs", metadata.location) # Redirection
388+
self.assertEqual(metadata.size, 497933648) # Size of LFS file, not pointer
389+
379390

380391
class StagingCachedDownloadTest(unittest.TestCase):
381392
def test_download_from_a_gated_repo_with_hf_hub_download(self):

0 commit comments

Comments
 (0)