ModelHubMixin: Fix attributes lost in inheritance (#2305)

Wauplin · qubvel · Wauplin · commit 30e5192e7bb7 · 2024-06-14T16:08:42.000+02:00
* ModelHubMixn: Fix attributes lost in inhericance

* make style

* deprecate

* style

* Update src/huggingface_hub/hub_mixin.py

Co-authored-by: Pavel Iakubovskii &lt;qubvel@gmail.com&gt;

---------

Co-authored-by: Pavel Iakubovskii &lt;qubvel@gmail.com&gt;
diff --git a/src/huggingface_hub/hub_mixin.py b/src/huggingface_hub/hub_mixin.py
@@ -1,6 +1,7 @@
 import inspect
 import json
 import os
+import warnings
 from dataclasses import asdict, dataclass, is_dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union, get_args
@@ -85,8 +86,8 @@ class ModelHubMixin:
             URL of the library documentation. Used to generate model card.
         model_card_template (`str`, *optional*):
             Template of the model card. Used to generate model card. Defaults to a generic template.
-        languages (`List[str]`, *optional*):
-            Languages supported by the library. Used to generate model card.
+        language (`str` or `List[str]`, *optional*):
+            Language supported by the library. Used to generate model card.
         library_name (`str`, *optional*):
             Name of the library integrating ModelHubMixin. Used to generate model card.
         license (`str`, *optional*):
@@ -191,7 +192,7 @@ def __init_subclass__(
         # Model card template
         model_card_template: str = DEFAULT_MODEL_CARD,
         # Model card metadata
-        languages: Optional[List[str]] = None,
+        language: Optional[List[str]] = None,
         library_name: Optional[str] = None,
         license: Optional[str] = None,
         license_name: Optional[str] = None,
@@ -205,27 +206,55 @@ def __init_subclass__(
             # Value is a tuple (encoder, decoder).
             # Example: {MyCustomType: (lambda x: x.value, lambda data: MyCustomType(data))}
         ] = None,
+        # Deprecated arguments
+        languages: Optional[List[str]] = None,
     ) -> None:
         """Inspect __init__ signature only once when subclassing + handle modelcard."""
         super().__init_subclass__()
 
         # Will be reused when creating modelcard
         tags = tags or []
         tags.append("model_hub_mixin")
-        cls._hub_mixin_info = MixinInfo(
-            model_card_template=model_card_template,
-            repo_url=repo_url,
-            docs_url=docs_url,
-            model_card_data=ModelCardData(
-                languages=languages,
-                library_name=library_name,
-                license=license,
-                license_name=license_name,
-                license_link=license_link,
-                pipeline_tag=pipeline_tag,
-                tags=tags,
-            ),
-        )
+
+        # Initialize MixinInfo if not existent
+        if not hasattr(cls, "_hub_mixin_info"):
+            cls._hub_mixin_info = MixinInfo(
+                model_card_template=model_card_template,
+                model_card_data=ModelCardData(),
+            )
+        info = cls._hub_mixin_info
+
+        if languages is not None:
+            warnings.warn(
+                "The `languages` argument is deprecated. Use `language` instead. This will be removed in `huggingface_hub>=0.27.0`.",
+                DeprecationWarning,
+            )
+            language = languages
+
+        # Update MixinInfo with metadata
+        if model_card_template is not None and model_card_template != DEFAULT_MODEL_CARD:
+            info.model_card_template = model_card_template
+        if repo_url is not None:
+            info.repo_url = repo_url
+        if docs_url is not None:
+            info.docs_url = docs_url
+        if language is not None:
+            info.model_card_data.language = language
+        if library_name is not None:
+            info.model_card_data.library_name = library_name
+        if license is not None:
+            info.model_card_data.license = license
+        if license_name is not None:
+            info.model_card_data.license_name = license_name
+        if license_link is not None:
+            info.model_card_data.license_link = license_link
+        if pipeline_tag is not None:
+            info.model_card_data.pipeline_tag = pipeline_tag
+        if tags is not None:
+            if info.model_card_data.tags is not None:
+                info.model_card_data.tags.extend(tags)
+            else:
+                info.model_card_data.tags = tags
 
         # Handle encoders/decoders for args
         cls._hub_mixin_coders = coders or {}
diff --git a/src/huggingface_hub/repocard_data.py b/src/huggingface_hub/repocard_data.py
@@ -242,37 +242,43 @@ class ModelCardData(CardData):
     """Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
 
     Args:
-        language (`Union[str, List[str]]`, *optional*):
-            Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
-            639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
-        license (`str`, *optional*):
-            License of this model. Example: apache-2.0 or any license from
-            https://huggingface.co/docs/hub/repositories-licenses. Defaults to None.
-        library_name (`str`, *optional*):
-            Name of library used by this model. Example: keras or any library from
-            https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts.
-            Defaults to None.
-        tags (`List[str]`, *optional*):
-            List of tags to add to your model that can be used when filtering on the Hugging
-            Face Hub. Defaults to None.
         base_model (`str` or `List[str]`, *optional*):
             The identifier of the base model from which the model derives. This is applicable for example if your model is a
             fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
             if your model derives from multiple models). Defaults to None.
         datasets (`List[str]`, *optional*):
             List of datasets that were used to train this model. Should be a dataset ID
             found on https://hf.co/datasets. Defaults to None.
-        metrics (`List[str]`, *optional*):
-            List of metrics used to evaluate this model. Should be a metric name that can be found
-            at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
         eval_results (`Union[List[EvalResult], EvalResult]`, *optional*):
             List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
             `model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
+        language (`Union[str, List[str]]`, *optional*):
+            Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
+            639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
+        library_name (`str`, *optional*):
+            Name of library used by this model. Example: keras or any library from
+            https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts.
+            Defaults to None.
+        license (`str`, *optional*):
+            License of this model. Example: apache-2.0 or any license from
+            https://huggingface.co/docs/hub/repositories-licenses. Defaults to None.
+        license_name (`str`, *optional*):
+            Name of the license of this model. Defaults to None. To be used in conjunction with `license_link`.
+            Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a name. In that case, use `license` instead.
+        license_link (`str`, *optional*):
+            Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
+            Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
+        metrics (`List[str]`, *optional*):
+            List of metrics used to evaluate this model. Should be a metric name that can be found
+            at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
         model_name (`str`, *optional*):
             A name for this model. It is used along with
             `eval_results` to construct the `model-index` within the card's metadata. The name
             you supply here is what will be used on PapersWithCode's leaderboards. If None is provided
             then the repo name is used as a default. Defaults to None.
+        tags (`List[str]`, *optional*):
+            List of tags to add to your model that can be used when filtering on the Hugging
+            Face Hub. Defaults to None.
         ignore_metadata_errors (`str`):
             If True, errors while parsing the metadata section will be ignored. Some information might be lost during
             the process. Use it at your own risk.
@@ -297,27 +303,33 @@ class ModelCardData(CardData):
     def __init__(
         self,
         *,
-        language: Optional[Union[str, List[str]]] = None,
-        license: Optional[str] = None,
-        library_name: Optional[str] = None,
-        tags: Optional[List[str]] = None,
         base_model: Optional[Union[str, List[str]]] = None,
         datasets: Optional[List[str]] = None,
-        metrics: Optional[List[str]] = None,
         eval_results: Optional[List[EvalResult]] = None,
+        language: Optional[Union[str, List[str]]] = None,
+        library_name: Optional[str] = None,
+        license: Optional[str] = None,
+        license_name: Optional[str] = None,
+        license_link: Optional[str] = None,
+        metrics: Optional[List[str]] = None,
         model_name: Optional[str] = None,
+        pipeline_tag: Optional[str] = None,
+        tags: Optional[List[str]] = None,
         ignore_metadata_errors: bool = False,
         **kwargs,
     ):
-        self.language = language
-        self.license = license
-        self.library_name = library_name
-        self.tags = _to_unique_list(tags)
         self.base_model = base_model
         self.datasets = datasets
-        self.metrics = metrics
         self.eval_results = eval_results
+        self.language = language
+        self.library_name = library_name
+        self.license = license
+        self.license_name = license_name
+        self.license_link = license_link
+        self.metrics = metrics
         self.model_name = model_name
+        self.pipeline_tag = pipeline_tag
+        self.tags = _to_unique_list(tags)
 
         model_index = kwargs.pop("model-index", None)
         if model_index:
diff --git a/tests/test_hub_mixin.py b/tests/test_hub_mixin.py
@@ -90,6 +90,14 @@ def _from_pretrained(
         return cls(**kwargs)
 
 
+class BaseModelForInheritance(ModelHubMixin, repo_url="https://hf.co/my-repo", library_name="my-cool-library"):
+    pass
+
+
+class DummyModelInherited(BaseModelForInheritance):
+    pass
+
+
 class DummyModelSavingConfig(ModelHubMixin):
     def _save_pretrained(self, save_directory: Path) -> None:
         """Implementation that uses `config.json` to serialize the config.
@@ -414,3 +422,9 @@ def test_from_cls_with_custom_type(self):
         assert model_reloaded.bar == "bar"
         assert model_reloaded.custom.value == "custom"
         assert model_reloaded.custom_default.value == "default"
+
+    def test_inherited_class(self):
+        """Test MixinInfo attributes are inherited from the parent class."""
+        model = DummyModelInherited()
+        assert model._hub_mixin_info.repo_url == "https://hf.co/my-repo"
+        assert model._hub_mixin_info.model_card_data.library_name == "my-cool-library"
diff --git a/tests/test_hub_mixin_pytorch.py b/tests/test_hub_mixin_pytorch.py
@@ -282,11 +282,11 @@ def test_push_to_hub(self):
     def test_generate_model_card(self):
         model = DummyModelWithModelCard()
         card = model.generate_model_card()
-        assert card.data.languages == ["en", "zh"]
+        assert card.data.language == ["en", "zh"]
         assert card.data.library_name == "my-dummy-lib"
         assert card.data.license == "apache-2.0"
         assert card.data.pipeline_tag == "text-classification"
-        assert card.data.tags == ["tag1", "tag2", "pytorch_model_hub_mixin", "model_hub_mixin"]
+        assert card.data.tags == ["model_hub_mixin", "pytorch_model_hub_mixin", "tag1", "tag2"]
 
         # Model card template has been used
         assert "This is a dummy model card." in str(card)