Skip to content

Commit f9de4e3

Browse files
committed
Merge branch 'main' into v0.11-release
2 parents 348394b + 0e0eaaa commit f9de4e3

File tree

12 files changed

+187
-93
lines changed

12 files changed

+187
-93
lines changed

.github/ISSUE_TEMPLATE/bug-report.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ body:
3636
huggingface-cli env
3737
```
3838
39-
If your are working in a notebook, please run it in a code cell:
39+
If you are working in a notebook, please run it in a code cell:
4040
```py
4141
from huggingface_hub import dump_environment_info
4242

.github/workflows/python-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ jobs:
7575
case "${{ matrix.test_name }}" in
7676
7777
"Repository only")
78-
eval "$PYTEST ../tests -k 'RepositoryTest'"
78+
eval "$PYTEST ../tests -k 'RepositoryTest or RepositoryDatasetTest'"
7979
;;
8080
8181
"Everything else")
82-
eval "$PYTEST ../tests -k 'not RepositoryTest'"
82+
eval "$PYTEST ../tests -k 'not RepositoryTest and not RepositoryDatasetTest'"
8383
;;
8484
8585
lfs)

src/huggingface_hub/_commit_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def fetch_upload_modes(
471471
if not path.endswith(".gitkeep"):
472472
warnings.warn(
473473
f"About to commit an empty file: '{path}'. Are you sure this is"
474-
" intended ?"
474+
" intended?"
475475
)
476476
upload_modes[path] = "regular"
477477

src/huggingface_hub/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def _is_true_or_auto(value: Optional[str]) -> bool:
4646
HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
4747
HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
4848
HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
49+
HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
4950

5051
REPO_ID_SEPARATOR = "--"
5152
# ^ this substring is not allowed in repo_ids on hf.co

src/huggingface_hub/file_download.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
HF_HUB_DISABLE_SYMLINKS_WARNING,
2727
HUGGINGFACE_CO_URL_TEMPLATE,
2828
HUGGINGFACE_HEADER_X_LINKED_ETAG,
29+
HUGGINGFACE_HEADER_X_LINKED_SIZE,
2930
HUGGINGFACE_HEADER_X_REPO_COMMIT,
3031
HUGGINGFACE_HUB_CACHE,
3132
REPO_ID_SEPARATOR,
@@ -146,7 +147,8 @@ class HfFileMetadata:
146147
location (`str`):
147148
Location where to download the file. Can be a Hub url or not (CDN).
148149
size (`size`):
149-
Size of the file.
150+
Size of the file. In case of an LFS file, contains the size of the actual
151+
LFS file, not the pointer.
150152
"""
151153

152154
commit_hash: Optional[str]
@@ -1384,7 +1386,10 @@ def get_hf_file_metadata(
13841386
# Do not use directly `url`, as `_request_wrapper` might have followed relative
13851387
# redirects.
13861388
location=r.headers.get("Location") or r.request.url, # type: ignore
1387-
size=_int_or_none(r.headers.get("Content-Length")),
1389+
size=_int_or_none(
1390+
r.headers.get(HUGGINGFACE_HEADER_X_LINKED_SIZE)
1391+
or r.headers.get("Content-Length")
1392+
),
13881393
)
13891394

13901395

src/huggingface_hub/repocard.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -788,47 +788,38 @@ def metadata_update(
788788
else:
789789
existing_results = card.data.eval_results
790790

791+
# Iterate over new results
792+
# Iterate over existing results
793+
# If both results describe the same metric but value is different:
794+
# If overwrite=True: overwrite the metric value
795+
# Else: raise ValueError
796+
# Else: append new result to existing ones.
791797
for new_result in new_results:
792798
result_found = False
793-
for existing_result_index, existing_result in enumerate(
794-
existing_results
795-
):
796-
if all(
797-
[
798-
new_result.dataset_name == existing_result.dataset_name,
799-
new_result.dataset_type == existing_result.dataset_type,
800-
new_result.task_type == existing_result.task_type,
801-
new_result.task_name == existing_result.task_name,
802-
new_result.metric_name == existing_result.metric_name,
803-
new_result.metric_type == existing_result.metric_type,
804-
]
805-
):
806-
if (
807-
new_result.metric_value != existing_result.metric_value
808-
and not overwrite
809-
):
810-
existing_str = (
811-
f"name: {new_result.metric_name}, type:"
812-
f" {new_result.metric_type}"
813-
)
799+
for existing_result in existing_results:
800+
if new_result.is_equal_except_value(existing_result):
801+
if new_result != existing_result and not overwrite:
814802
raise ValueError(
815803
"You passed a new value for the existing metric"
816-
f" '{existing_str}'. Set `overwrite=True` to"
817-
" overwrite existing metrics."
804+
f" 'name: {new_result.metric_name}, type: "
805+
f"{new_result.metric_type}'. Set `overwrite=True`"
806+
" to overwrite existing metrics."
818807
)
819808
result_found = True
820-
card.data.eval_results[existing_result_index] = new_result
809+
existing_result.metric_value = new_result.metric_value
821810
if not result_found:
822811
card.data.eval_results.append(new_result)
823812
else:
813+
# Any metadata that is not a result metric
824814
if (
825815
hasattr(card.data, key)
826816
and getattr(card.data, key) is not None
827817
and not overwrite
828818
and getattr(card.data, key) != value
829819
):
830820
raise ValueError(
831-
f"""You passed a new value for the existing meta data field '{key}'. Set `overwrite=True` to overwrite existing metadata."""
821+
f"You passed a new value for the existing meta data field '{key}'."
822+
" Set `overwrite=True` to overwrite existing metadata."
832823
)
833824
else:
834825
setattr(card.data, key, value)

src/huggingface_hub/repocard_data.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,18 @@ class EvalResult:
121121
# A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not.
122122
verify_token: Optional[str] = None
123123

124+
def is_equal_except_value(self, other: "EvalResult") -> bool:
125+
"""
126+
Return True if `self` and `other` describe exactly the same metric but with a
127+
different value.
128+
"""
129+
for key, _ in self.__dict__.items():
130+
if key == "metric_value":
131+
continue
132+
if getattr(self, key) != getattr(other, key):
133+
return False
134+
return True
135+
124136

125137
@dataclass
126138
class CardData:

src/huggingface_hub/repository.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import tempfile
66
import threading
77
import time
8+
import warnings
89
from contextlib import contextmanager
910
from pathlib import Path
1011
from typing import Callable, Dict, Iterator, List, Optional, Tuple, Union
@@ -15,7 +16,14 @@
1516

1617
from .hf_api import HfApi, repo_type_and_id_from_hf_id
1718
from .lfs import LFS_MULTIPART_UPLOAD_COMMAND
18-
from .utils import HfFolder, logging, run_subprocess, tqdm, validate_hf_hub_args
19+
from .utils import (
20+
HfFolder,
21+
RepositoryNotFoundError,
22+
logging,
23+
run_subprocess,
24+
tqdm,
25+
validate_hf_hub_args,
26+
)
1927
from .utils._deprecation import _deprecate_arguments, _deprecate_method
2028
from .utils._typing import TypedDict
2129

@@ -684,9 +692,10 @@ def clone_from(self, repo_url: str, token: Union[bool, str, None] = None):
684692
if hub_url in repo_url or (
685693
"http" not in repo_url and len(repo_url.split("/")) <= 2
686694
):
687-
repo_type, namespace, repo_id = repo_type_and_id_from_hf_id(
695+
repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(
688696
repo_url, hub_url=hub_url
689697
)
698+
repo_id = f"{namespace}/{repo_name}" if namespace is not None else repo_name
690699

691700
if repo_type is not None:
692701
self._repo_type = repo_type
@@ -701,10 +710,32 @@ def clone_from(self, repo_url: str, token: Union[bool, str, None] = None):
701710
scheme = urlparse(repo_url).scheme
702711
repo_url = repo_url.replace(f"{scheme}://", f"{scheme}://user:{token}@")
703712

704-
if namespace is not None:
705-
repo_url += f"{namespace}/"
706713
repo_url += repo_id
707714

715+
# To be removed: check if repo exists. If not, create it first.
716+
try:
717+
HfApi().repo_info(f"{repo_id}", repo_type=self._repo_type, token=token)
718+
except RepositoryNotFoundError:
719+
if self._repo_type == "space":
720+
raise ValueError(
721+
"Creating a Space through passing Space link to clone_from is"
722+
" not allowed. Make sure the Space exists on Hugging Face Hub."
723+
)
724+
else:
725+
warnings.warn(
726+
"Creating a repository through 'clone_from' is deprecated and"
727+
" will be removed in v0.12. Please create the repository first"
728+
" using `create_repo(..., exists_ok=True)`.",
729+
FutureWarning,
730+
)
731+
self.client.create_repo(
732+
repo_id=repo_id,
733+
token=token,
734+
repo_type=self._repo_type,
735+
exist_ok=True,
736+
private=self._private,
737+
)
738+
708739
# For error messages, it's cleaner to show the repo url without the token.
709740
clean_repo_url = re.sub(r"(https?)://.*@", r"\1://", repo_url)
710741
try:

src/huggingface_hub/utils/_runtime.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,13 @@ def is_google_colab() -> bool:
182182

183183

184184
def dump_environment_info() -> Dict[str, Any]:
185+
"""Dump information about the machine to help debugging issues.
186+
187+
Similar helper exist in:
188+
- `datasets` (https://github.com/huggingface/datasets/blob/main/src/datasets/commands/env.py)
189+
- `diffusers` (https://github.com/huggingface/diffusers/blob/main/src/diffusers/commands/env.py)
190+
- `transformers` (https://github.com/huggingface/transformers/blob/main/src/transformers/commands/env.py)
191+
"""
185192
from huggingface_hub import HfFolder, whoami
186193
from huggingface_hub.utils import list_credential_helpers
187194

tests/test_file_download.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,17 @@ def test_get_hf_file_metadata_from_a_renamed_repo(self) -> None:
376376
url.replace(DUMMY_RENAMED_OLD_MODEL_ID, DUMMY_RENAMED_NEW_MODEL_ID),
377377
)
378378

379+
def test_get_hf_file_metadata_from_a_lfs_file(self) -> None:
380+
"""Test getting metadata from an LFS file.
381+
382+
Must get size of the LFS file, not size of the pointer file
383+
"""
384+
url = hf_hub_url("gpt2", filename="tf_model.h5")
385+
metadata = get_hf_file_metadata(url)
386+
387+
self.assertIn("cdn-lfs", metadata.location) # Redirection
388+
self.assertEqual(metadata.size, 497933648) # Size of LFS file, not pointer
389+
379390

380391
class StagingCachedDownloadTest(unittest.TestCase):
381392
def test_download_from_a_gated_repo_with_hf_hub_download(self):

0 commit comments

Comments
 (0)