Skip to content

Commit dac13ac

Browse files
authored
Create modelcard if doesn't exist on update_metadata (#1061)
* Create modelcard if not exist on update_metadata * fix test * fix unicode characters in modelcards * unicode by default in yaml * Add test for update metadata on missing repocard * docstring * remove useless comment
1 parent ae4f420 commit dac13ac

File tree

7 files changed

+125
-9
lines changed

7 files changed

+125
-9
lines changed

src/huggingface_hub/keras_mixin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
from typing import Any, Dict, List, Optional, Union
99
from urllib.parse import quote
1010

11-
import yaml
1211
from huggingface_hub import CommitOperationDelete, ModelHubMixin, snapshot_download
1312
from huggingface_hub.file_download import (
1413
get_tf_version,
1514
is_graphviz_available,
1615
is_pydot_available,
1716
is_tf_available,
1817
)
18+
from huggingface_hub.utils import yaml_dump
1919

2020
from .constants import CONFIG_NAME, DEFAULT_REVISION
2121
from .hf_api import HfApi, _parse_revision_from_pr_url, _prepare_upload_folder_commit
@@ -105,7 +105,7 @@ def _create_model_card(
105105
readme_path = f"{repo_dir}/README.md"
106106
metadata["library_name"] = "keras"
107107
model_card = "---\n"
108-
model_card += yaml.dump(metadata, default_flow_style=False)
108+
model_card += yaml_dump(metadata, default_flow_style=False)
109109
model_card += "---\n"
110110
model_card += "\n## Model description\n\nMore information needed\n"
111111
model_card += "\n## Intended uses & limitations\n\nMore information needed\n"

src/huggingface_hub/repocard.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import sys
44
import tempfile
55
from pathlib import Path
6-
from typing import Any, Dict, Optional, Union
6+
from typing import Any, Dict, Optional, Type, Union
77

88

99
if sys.version_info >= (3, 8):
@@ -23,9 +23,10 @@
2323
eval_results_to_model_index,
2424
model_index_to_eval_results,
2525
)
26+
from huggingface_hub.utils import yaml_dump
2627

2728
from .constants import REPOCARD_NAME
28-
from .utils import validate_hf_hub_args
29+
from .utils import EntryNotFoundError, validate_hf_hub_args
2930
from .utils.logging import get_logger
3031

3132

@@ -512,7 +513,7 @@ def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
512513

513514
# creates a new file if it not
514515
with open(local_path, "w", newline="") as readme:
515-
data_yaml = yaml.dump(data, sort_keys=False, line_break=line_break)
516+
data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break)
516517
# sort_keys: keep dict order
517518
match = REGEX_YAML_BLOCK.search(content)
518519
if match:
@@ -669,6 +670,9 @@ def metadata_update(
669670
) -> str:
670671
"""
671672
Updates the metadata in the README.md of a repository on the Hugging Face Hub.
673+
If the README.md file doesn't exist yet, a new one is created with metadata and an
674+
the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
675+
as a Space cannot exist without a `README.md` file.
672676
673677
Args:
674678
repo_id (`str`):
@@ -685,7 +689,7 @@ def metadata_update(
685689
The Hugging Face authentication token.
686690
commit_message (`str`, *optional*):
687691
The summary / title / first line of the generated commit. Defaults to
688-
`f"Update metdata with huggingface_hub"`
692+
`f"Update metadata with huggingface_hub"`
689693
commit_description (`str` *optional*)
690694
The description of the generated commit
691695
revision (`str`, *optional*):
@@ -724,7 +728,30 @@ def metadata_update(
724728
else "Update metadata with huggingface_hub"
725729
)
726730

727-
card = ModelCard.load(repo_id, token=token)
731+
# Card class given repo_type
732+
card_class: Type[RepoCard]
733+
if repo_type is None or repo_type == "model":
734+
card_class = ModelCard
735+
elif repo_type == "dataset":
736+
card_class = DatasetCard
737+
elif repo_type == "space":
738+
card_class = RepoCard
739+
else:
740+
raise ValueError(f"Unknown repo_type: {repo_type}")
741+
742+
# Either load repo_card from the Hub or create an empty one.
743+
# NOTE: Will not create the repo if it doesn't exist.
744+
try:
745+
card = card_class.load(repo_id, token=token, repo_type=repo_type)
746+
except EntryNotFoundError:
747+
if repo_type == "space":
748+
raise ValueError(
749+
"Cannot update metadata on a Space that doesn't contain a `README.md`"
750+
" file."
751+
)
752+
753+
# Initialize a ModelCard or DatasetCard from default template and no data.
754+
card = card_class.from_template(CardData())
728755

729756
for key, value in metadata.items():
730757
if key == "model-index":

src/huggingface_hub/repocard_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dataclasses import dataclass
44
from typing import Any, Dict, List, Optional, Tuple, Union
55

6-
import yaml
6+
from huggingface_hub.utils import yaml_dump
77

88
from .utils.logging import get_logger
99

@@ -152,7 +152,7 @@ def to_yaml(self, line_break=None) -> str:
152152
Returns:
153153
`str`: CardData represented as a YAML block.
154154
"""
155-
return yaml.dump(self.to_dict(), sort_keys=False, line_break=line_break).strip()
155+
return yaml_dump(self.to_dict(), sort_keys=False, line_break=line_break).strip()
156156

157157
def __repr__(self):
158158
return self.to_yaml()

src/huggingface_hub/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
RevisionNotFoundError,
3636
hf_raise_for_status,
3737
)
38+
from ._fixes import yaml_dump
3839
from ._headers import build_hf_headers
3940
from ._hf_folder import HfFolder
4041
from ._http import http_backoff

src/huggingface_hub/utils/_fixes.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,20 @@
88
from simplejson import JSONDecodeError # noqa
99
except ImportError:
1010
from json import JSONDecodeError # noqa
11+
12+
from functools import partial
13+
14+
import yaml
15+
16+
17+
# Wrap `yaml.dump` to set `allow_unicode=True` by default.
18+
#
19+
# Example:
20+
# ```py
21+
# >>> yaml.dump({"emoji": "👀", "some unicode": "日本か"})
22+
# 'emoji: "\\U0001F440"\nsome unicode: "\\u65E5\\u672C\\u304B"\n'
23+
#
24+
# >>> yaml_dump({"emoji": "👀", "some unicode": "日本か"})
25+
# 'emoji: "👀"\nsome unicode: "日本か"\n'
26+
# ```
27+
yaml_dump = partial(yaml.dump, allow_unicode=True)

tests/test_repocard.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,61 @@ def test_update_metadata_on_empty_text_content(self) -> None:
418418
self.assertDictEqual(updated_metadata, expected_metadata)
419419

420420

421+
class TestMetadataUpdateOnMissingCard(unittest.TestCase):
422+
def setUp(self) -> None:
423+
"""
424+
Share this valid token in all tests below.
425+
"""
426+
self._token = TOKEN
427+
self._api = HfApi(endpoint=ENDPOINT_STAGING)
428+
self._repo_id = f"{USER}/{repo_name()}"
429+
430+
def test_metadata_update_missing_readme_on_model(self) -> None:
431+
self._api.create_repo(self._repo_id, token=self._token)
432+
metadata_update(self._repo_id, {"tag": "this_is_a_test"}, token=self._token)
433+
model_card = ModelCard.load(self._repo_id, token=self._token)
434+
435+
# Created a card with default template + metadata
436+
self.assertIn("# Model Card for Model ID", str(model_card))
437+
self.assertEqual(model_card.data.to_dict(), {"tag": "this_is_a_test"})
438+
439+
self._api.delete_repo(self._repo_id, token=self._token)
440+
441+
def test_metadata_update_missing_readme_on_dataset(self) -> None:
442+
self._api.create_repo(self._repo_id, repo_type="dataset", token=self._token)
443+
metadata_update(
444+
self._repo_id,
445+
{"tag": "this is a dataset test"},
446+
token=self._token,
447+
repo_type="dataset",
448+
)
449+
dataset_card = DatasetCard.load(self._repo_id, token=self._token)
450+
451+
# Created a card with default template + metadata
452+
self.assertIn("# Dataset Card for Dataset Name", str(dataset_card))
453+
self.assertEqual(dataset_card.data.to_dict(), {"tag": "this is a dataset test"})
454+
455+
self._api.delete_repo(self._repo_id, repo_type="dataset", token=self._token)
456+
457+
def test_metadata_update_missing_readme_on_space(self) -> None:
458+
self._api.create_repo(
459+
self._repo_id, repo_type="space", token=self._token, space_sdk="static"
460+
)
461+
self._api.delete_file(
462+
"README.md", self._repo_id, repo_type="space", token=self._token
463+
)
464+
with self.assertRaises(ValueError):
465+
# Cannot create a default readme on a space repo (should be automatically
466+
# created on the Hub).
467+
metadata_update(
468+
self._repo_id,
469+
{"tag": "this is a space test"},
470+
token=self._token,
471+
repo_type="space",
472+
)
473+
self._api.delete_repo(self._repo_id, repo_type="space", token=self._token)
474+
475+
421476
class TestCaseWithCapLog(unittest.TestCase):
422477
_api = HfApi(endpoint=ENDPOINT_STAGING)
423478

tests/test_utils_fixes.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import unittest
2+
3+
from huggingface_hub.utils import yaml_dump
4+
5+
6+
class TestYamlDump(unittest.TestCase):
7+
def test_yaml_dump_emoji(self) -> None:
8+
self.assertEqual(yaml_dump({"emoji": "👀"}), "emoji: 👀\n")
9+
10+
def test_yaml_dump_japanese_characters(self) -> None:
11+
self.assertEqual(yaml_dump({"some unicode": "日本か"}), "some unicode: 日本か\n")
12+
13+
def test_yaml_dump_explicit_no_unicode(self) -> None:
14+
self.assertEqual(
15+
yaml_dump({"emoji": "👀"}, allow_unicode=False), 'emoji: "\\U0001F440"\n'
16+
)

0 commit comments

Comments
 (0)