Skip to content

Commit 0cc3822

Browse files
Wauplindavanstrien
andauthored
Explicit raise on invalid model_index + add ignore_metadata_errors option (#1377)
* Explicit raise on invalid model_index + add ignore_metadata_errors option * fix tests + typo * use cache dir in RepocardMetadataTest * get rid of Repository in tests (speed-up) * tpyo * Apply suggestions from code review Co-authored-by: Daniel van Strien <[email protected]> --------- Co-authored-by: Daniel van Strien <[email protected]>
1 parent 4c0fcd1 commit 0cc3822

File tree

5 files changed

+110
-143
lines changed

5 files changed

+110
-143
lines changed

src/huggingface_hub/_commit_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ def fetch_upload_modes(
409409
create_pr: bool = False,
410410
) -> Dict[str, UploadMode]:
411411
"""
412-
Requests the Hub "preupload" endpoint to determine wether each input file
412+
Requests the Hub "preupload" endpoint to determine whether each input file
413413
should be uploaded as a regular git blob or as git LFS blob.
414414
415415
Args:

src/huggingface_hub/community.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class Discussion:
4444
The username of the Discussion / Pull Request author.
4545
Can be `"deleted"` if the user has been deleted since.
4646
is_pull_request (`bool`):
47-
Wether or not this is a Pull Request.
47+
Whether or not this is a Pull Request.
4848
created_at (`datetime`):
4949
The `datetime` of creation of the Discussion / Pull Request.
5050
"""
@@ -96,7 +96,7 @@ class DiscussionWithDetails(Discussion):
9696
The username of the Discussion / Pull Request author.
9797
Can be `"deleted"` if the user has been deleted since.
9898
is_pull_request (`bool`):
99-
Wether or not this is a Pull Request.
99+
Whether or not this is a Pull Request.
100100
created_at (`datetime`):
101101
The `datetime` of creation of the Discussion / Pull Request.
102102
events (`list` of [`DiscussionEvent`])
@@ -175,7 +175,7 @@ class DiscussionComment(DiscussionEvent):
175175
content (`str`):
176176
The raw markdown content of the comment. Mentions, links and images are not rendered.
177177
edited (`bool`):
178-
Wether or not this comment has been edited.
178+
Whether or not this comment has been edited.
179179
hidden (`bool`):
180180
Whether or not this comment has been hidden.
181181
"""

src/huggingface_hub/repocard.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class RepoCard:
4040
default_template_path = TEMPLATE_MODELCARD_PATH
4141
repo_type = "model"
4242

43-
def __init__(self, content: str):
43+
def __init__(self, content: str, ignore_metadata_errors: bool = False):
4444
"""Initialize a RepoCard from string content. The content should be a
4545
Markdown file with a YAML block at the beginning and a Markdown body.
4646
@@ -76,6 +76,7 @@ def __init__(self, content: str):
7676

7777
# Set the content of the RepoCard, as well as underlying .data and .text attributes.
7878
# See the `content` property setter for more details.
79+
self.ignore_metadata_errors = ignore_metadata_errors
7980
self.content = content
8081

8182
@property
@@ -105,7 +106,7 @@ def content(self, content: str):
105106
data_dict = {}
106107
self.text = content
107108

108-
self.data = self.card_data_class(**data_dict)
109+
self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
109110

110111
def __str__(self):
111112
return self.content
@@ -136,20 +137,22 @@ def load(
136137
repo_id_or_path: Union[str, Path],
137138
repo_type: Optional[str] = None,
138139
token: Optional[str] = None,
140+
ignore_metadata_errors: bool = False,
139141
):
140142
"""Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
141143
142144
Args:
143145
repo_id_or_path (`Union[str, Path]`):
144146
The repo ID associated with a Hugging Face Hub repo or a local filepath.
145147
repo_type (`str`, *optional*):
146-
The type of Hugging Face repo to push to. Defaults to None, which will use
147-
use "model". Other options are "dataset" and "space". Not used when loading from
148-
a local filepath. If this is called from a child class, the default value will be
149-
the child class's `repo_type`.
148+
The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options
149+
are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
150+
class, the default value will be the child class's `repo_type`.
150151
token (`str`, *optional*):
151-
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
152-
the stored token.
152+
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
153+
ignore_metadata_errors (`str`):
154+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
155+
the process. Use it at your own risk.
153156
154157
Returns:
155158
[`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
@@ -178,7 +181,7 @@ def load(
178181

179182
# Preserve newlines in the existing file.
180183
with Path(card_path).open(mode="r", newline="", encoding="utf-8") as f:
181-
return cls(f.read())
184+
return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
182185

183186
def validate(self, repo_type: Optional[str] = None):
184187
"""Validates card against Hugging Face Hub's card validation logic.

src/huggingface_hub/repocard_data.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class CardData:
158158
inherit from `dict` to allow this export step.
159159
"""
160160

161-
def __init__(self, **kwargs):
161+
def __init__(self, ignore_metadata_errors: bool = False, **kwargs):
162162
self.__dict__.update(kwargs)
163163

164164
def to_dict(self) -> Dict[str, Any]:
@@ -248,6 +248,9 @@ class ModelCardData(CardData):
248248
`eval_results` to construct the `model-index` within the card's metadata. The name
249249
you supply here is what will be used on PapersWithCode's leaderboards. If None is provided
250250
then the repo name is used as a default. Defaults to None.
251+
ignore_metadata_errors (`str`):
252+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
253+
the process. Use it at your own risk.
251254
kwargs (`dict`, *optional*):
252255
Additional metadata that will be added to the model card. Defaults to None.
253256
@@ -277,6 +280,7 @@ def __init__(
277280
metrics: Optional[List[str]] = None,
278281
eval_results: Optional[List[EvalResult]] = None,
279282
model_name: Optional[str] = None,
283+
ignore_metadata_errors: bool = False,
280284
**kwargs,
281285
):
282286
self.language = language
@@ -294,8 +298,15 @@ def __init__(
294298
model_name, eval_results = model_index_to_eval_results(model_index)
295299
self.model_name = model_name
296300
self.eval_results = eval_results
297-
except KeyError:
298-
logger.warning("Invalid model-index. Not loading eval results into CardData.")
301+
except KeyError as error:
302+
if ignore_metadata_errors:
303+
logger.warning("Invalid model-index. Not loading eval results into CardData.")
304+
else:
305+
raise ValueError(
306+
f"Invalid `model_index` in metadata cannot be parsed: KeyError {error}. Pass"
307+
" `ignore_metadata_errors=True` to ignore this error while loading a Model Card. Warning:"
308+
" some information will be lost. Use it at your own risk."
309+
)
299310

300311
super().__init__(**kwargs)
301312

@@ -350,6 +361,9 @@ class DatasetCardData(CardData):
350361
If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
351362
configs (`Union[str, List[str]]`, *optional*):
352363
A list of the available dataset configs for the dataset.
364+
ignore_metadata_errors (`str`):
365+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
366+
the process. Use it at your own risk.
353367
"""
354368

355369
def __init__(
@@ -368,6 +382,7 @@ def __init__(
368382
pretty_name: Optional[str] = None,
369383
train_eval_index: Optional[Dict] = None,
370384
configs: Optional[Union[str, List[str]]] = None,
385+
ignore_metadata_errors: bool = False,
371386
**kwargs,
372387
):
373388
self.annotations_creators = annotations_creators
@@ -421,6 +436,9 @@ class SpaceCardData(CardData):
421436
List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
422437
tags (`List[str]`, *optional*)
423438
List of tags to add to your Space that can be used when filtering on the Hub.
439+
ignore_metadata_errors (`str`):
440+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
441+
the process. Use it at your own risk.
424442
kwargs (`dict`, *optional*):
425443
Additional metadata that will be added to the space card.
426444
@@ -452,6 +470,7 @@ def __init__(
452470
models: Optional[List[str]] = None,
453471
datasets: Optional[List[str]] = None,
454472
tags: Optional[List[str]] = None,
473+
ignore_metadata_errors: bool = False,
455474
**kwargs,
456475
):
457476
self.title = title

0 commit comments

Comments
 (0)