huggingface
diff --git a/‎src/huggingface_hub/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎src/huggingface_hub/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/huggingface_hub/hf_api.py‎
Lines changed: 158 additions & 19 deletions b/‎src/huggingface_hub/hf_api.py‎
Lines changed: 158 additions & 19 deletions
@@ -32,8 +32,10 @@
 )
 from .file_download import cached_download, hf_hub_download, hf_hub_url
 from .hf_api import (
+    DatasetSearchArguments,
     HfApi,
     HfFolder,
+    ModelSearchArguments,
     create_repo,
     dataset_info,
     delete_file,
@@ -65,3 +67,4 @@
 from .repository import Repository
 from .snapshot_download import snapshot_download
 from .utils import logging
+from .utils.endpoint_helpers import DatasetFilter, ModelFilter
@@ -32,7 +32,13 @@
     REPO_TYPES_URL_PREFIXES,
     SPACES_SDK_TYPES,
 )
-from .utils.tags import AttributeDictionary, DatasetTags, ModelTags
+from .utils.endpoint_helpers import (
+    AttributeDictionary,
+    DatasetFilter,
+    DatasetTags,
+    ModelFilter,
+    ModelTags,
+)
 
 
 if sys.version_info >= (3, 8):
@@ -271,7 +277,6 @@ class ModelSearchArguments(AttributeDictionary):
     A nested namespace object holding all possible values for properties of
     models currently hosted in the Hub with tab-completion.
     If a value starts with a number, it will only exist in the dictionary
-
     Example:
         >>> args = ModelSearchArgs()
         >>> args.author_or_organization.huggingface
@@ -298,15 +303,14 @@ def clean(s: str):
                 name = model.modelId
             model_name_dict[name] = clean(name)
         self["model_name"] = model_name_dict
-        self["author_or_organization"] = author_dict
+        self["author"] = author_dict
 
 
 class DatasetSearchArguments(AttributeDictionary):
     """
     A nested namespace object holding all possible values for properties of
     datasets currently hosted in the Hub with tab-completion.
     If a value starts with a number, it will only exist in the dictionary
-
     Example:
         >>> args = DatasetSearchArguments()
         >>> args.author_or_organization.huggingface
@@ -333,7 +337,7 @@ def clean(s: str):
                 name = dataset.id
             dataset_name_dict[name] = clean(name)
         self["dataset_name"] = dataset_name_dict
-        self["author_or_organization"] = author_dict
+        self["author"] = author_dict
 
 
 def write_to_credential_store(username: str, password: str):
@@ -506,7 +510,7 @@ def get_dataset_tags(self) -> DatasetTags:
 
     def list_models(
         self,
-        filter: Union[str, Iterable[str], None] = None,
+        filter: Union[ModelFilter, str, Iterable[str], None] = None,
         author: Optional[str] = None,
         search: Optional[str] = None,
         sort: Union[Literal["lastModified"], str, None] = None,
@@ -519,8 +523,8 @@ def list_models(
         Get the public list of all the models on huggingface.co
 
         Args:
-            filter (:obj:`str` or :class:`Iterable`, `optional`):
-                A string which can be used to identify models on the hub by their tags.
+            filter (:class:`ModelFilter` or :obj:`str` or :class:`Iterable`, `optional`):
+                A string or `ModelFilter` which can be used to identify models on the hub.
                 Example usage:
 
                     >>> from huggingface_hub import HfApi
@@ -529,17 +533,26 @@ def list_models(
                     >>> # List all models
                     >>> api.list_models()
 
+                    >>> # Get all valid search arguments
+                    >>> args = ModelSearchArguments()
+
                     >>> # List only the text classification models
                     >>> api.list_models(filter="text-classification")
+                    >>> # Using the `ModelFilter`
+                    >>> filt = ModelFilter(task="text-classification")
+                    >>> # With `ModelSearchArguments`
+                    >>> filt = ModelFilter(task=args.pipeline_tags.TextClassification)
+                    >>> api.list_models(filter=filt)
 
-                    >>> # List only the russian models compatible with pytorch
-                    >>> api.list_models(filter=("ru", "pytorch"))
+                    >>> # Using `ModelFilter` and `ModelSearchArguments` to find text classification in both PyTorch and TensorFlow
+                    >>> filt = ModelFilter(task=args.pipeline_tags.TextClassification, library=[args.library.PyTorch, args.library.TensorFlow])
+                    >>> api.list_models(filter=filt)
 
-                    >>> # List only the models trained on the "common_voice" dataset
-                    >>> api.list_models(filter="dataset:common_voice")
-
-                    >>> # List only the models from the AllenNLP library
+                    >>> # List only models from the AllenNLP library
                     >>> api.list_models(filter="allennlp")
+                    >>> # Using `ModelFilter` and `ModelSearchArguments`
+                    >>> filt = ModelFilter(library=args.library.allennlp)
+
             author (:obj:`str`, `optional`):
                 A string which identify the author (user or organization) of the returned models
                 Example usage:
@@ -552,6 +565,7 @@ def list_models(
 
                     >>> # List only the text classification models from google
                     >>> api.list_models(filter="text-classification", author="google")
+
             search (:obj:`str`, `optional`):
                 A string that will be contained in the returned models
                 Example usage:
@@ -564,6 +578,7 @@ def list_models(
 
                     >>> #List all models with "bert" in their name made by google
                     >>> api.list_models(search="bert", author="google")
+
             sort (:obj:`Literal["lastModified"]` or :obj:`str`, `optional`):
                 The key with which to sort the resulting models. Possible values are the properties of the `ModelInfo`
                 class.
@@ -582,7 +597,10 @@ def list_models(
         path = f"{self.endpoint}/api/models"
         params = {}
         if filter is not None:
-            params.update({"filter": filter})
+            if isinstance(filter, ModelFilter):
+                params = self._unpack_model_filter(filter)
+            else:
+                params.update({"filter": filter})
             params.update({"full": True})
         if author is not None:
             params.update({"author": author})
@@ -606,9 +624,69 @@ def list_models(
         d = r.json()
         return [ModelInfo(**x) for x in d]
 
+    def _unpack_model_filter(self, model_filter: ModelFilter):
+        """
+        Unpacks a `ModelFilter` into something readable for `list_models`
+        """
+        model_str = ""
+        tags = []
+
+        # Handling author
+        if model_filter.author is not None:
+            model_str = f"{model_filter.author}/"
+
+        # Handling model_name
+        if model_filter.model_name is not None:
+            model_str += model_filter.model_name
+
+        filter_tuple = []
+
+        # Handling tasks
+        if model_filter.task is not None:
+            filter_tuple.extend(
+                [model_filter.task]
+                if isinstance(model_filter.task, str)
+                else model_filter.task
+            )
+
+        # Handling dataset
+        if model_filter.trained_dataset is not None:
+            if not isinstance(model_filter.trained_dataset, (list, tuple)):
+                model_filter.trained_dataset = [model_filter.trained_dataset]
+            for dataset in model_filter.trained_dataset:
+                if "dataset:" not in dataset:
+                    dataset = f"dataset:{dataset}"
+                filter_tuple.append(dataset)
+
+        # Handling library
+        if model_filter.library:
+            filter_tuple.extend(
+                [model_filter.library]
+                if isinstance(model_filter.library, str)
+                else model_filter.library
+            )
+
+        # Handling tags
+        if model_filter.tags:
+            tags.extend(
+                [model_filter.tags]
+                if isinstance(model_filter.tags, str)
+                else model_filter.tags
+            )
+
+        query_dict = {}
+        if model_str is not None:
+            query_dict["search"] = model_str
+        if len(tags) > 0:
+            query_dict["tags"] = tags
+        if model_filter.language is not None:
+            filter_tuple.append(model_filter.language)
+        query_dict["filter"] = tuple(filter_tuple)
+        return query_dict
+
     def list_datasets(
         self,
-        filter: Union[str, Iterable[str], None] = None,
+        filter: Union[DatasetFilter, str, Iterable[str], None] = None,
         author: Optional[str] = None,
         search: Optional[str] = None,
         sort: Union[Literal["lastModified"], str, None] = None,
@@ -620,21 +698,36 @@ def list_datasets(
         Get the public list of all the datasets on huggingface.co
 
         Args:
-            filter (:obj:`str` or :class:`Iterable`, `optional`):
-                A string which can be used to identify datasets on the hub by their tags.
+            filter (:class:`DatasetFilter` or :obj:`str` or :class:`Iterable`, `optional`):
+                A string or `DatasetFilter` which can be used to identify datasets on the hub.
                 Example usage:
 
+
                     >>> from huggingface_hub import HfApi
                     >>> api = HfApi()
 
                     >>> # List all datasets
                     >>> api.list_datasets()
 
+                    >>> # Get all valid search arguments
+                    >>> args = DatasetSearchArguments()
+
                     >>> # List only the text classification datasets
                     >>> api.list_datasets(filter="task_categories:text-classification")
+                    >>> # Using the `DatasetFilter`
+                    >>> filt = DatasetFilter(task_categories="text-classification")
+                    >>> # With `DatasetSearchArguments`
+                    >>> filt = DatasetFilter(task=args.task_categories.text_classification)
+                    >>> api.list_models(filter=filt)
 
                     >>> # List only the datasets in russian for language modeling
                     >>> api.list_datasets(filter=("languages:ru", "task_ids:language-modeling"))
+                    >>> # Using the `DatasetFilter`
+                    >>> filt = DatasetFilter(languages="ru", task_ids="language-modeling")
+                    >>> # With `DatasetSearchArguments`
+                    >>> filt = DatasetFilter(languages=args.languages.ru, task_ids=args.task_ids.language_modeling)
+                    >>> api.list_datasets(filter=filt)
+
             author (:obj:`str`, `optional`):
                 A string which identify the author of the returned models
                 Example usage:
@@ -647,6 +740,7 @@ def list_datasets(
 
                     >>> # List only the text classification datasets from google
                     >>> api.list_datasets(filter="text-classification", author="google")
+
             search (:obj:`str`, `optional`):
                 A string that will be contained in the returned models
                 Example usage:
@@ -659,6 +753,7 @@ def list_datasets(
 
                     >>> #List all datasets with "text" in their name made by google
                     >>> api.list_datasets(search="text", author="google")
+
             sort (:obj:`Literal["lastModified"]` or :obj:`str`, `optional`):
                 The key with which to sort the resulting datasets. Possible values are the properties of the `DatasetInfo`
                 class.
@@ -674,7 +769,10 @@ def list_datasets(
         path = f"{self.endpoint}/api/datasets"
         params = {}
         if filter is not None:
-            params.update({"filter": filter})
+            if isinstance(filter, DatasetFilter):
+                params = self._unpack_dataset_filter(filter)
+            else:
+                params.update({"filter": filter})
         if author is not None:
             params.update({"author": author})
         if search is not None:
@@ -693,6 +791,47 @@ def list_datasets(
         d = r.json()
         return [DatasetInfo(**x) for x in d]
 
+    def _unpack_dataset_filter(self, dataset_filter: DatasetFilter):
+        """
+        Unpacks a `DatasetFilter` into something readable for `list_datasets`
+        """
+        dataset_str = ""
+
+        # Handling author
+        if dataset_filter.author is not None:
+            dataset_str = f"{dataset_filter.author}/"
+
+        # Handling dataset_name
+        if dataset_filter.dataset_name is not None:
+            dataset_str += dataset_filter.dataset_name
+
+        filter_tuple = []
+        data_attributes = [
+            "benchmark",
+            "language_creators",
+            "languages",
+            "multilinguality",
+            "size_categories",
+            "task_categories",
+            "task_ids",
+        ]
+
+        for attr in data_attributes:
+            curr_attr = getattr(dataset_filter, attr)
+            if curr_attr is not None:
+                if not isinstance(curr_attr, (list, tuple)):
+                    curr_attr = [curr_attr]
+                for data in curr_attr:
+                    if f"{attr}:" not in data:
+                        data = f"{attr}:{data}"
+                    filter_tuple.append(data)
+
+        query_dict = {}
+        if dataset_str is not None:
+            query_dict["search"] = dataset_str
+        query_dict["filter"] = tuple(filter_tuple)
+        return query_dict
+
     def list_metrics(self) -> List[MetricInfo]:
         """
         Get the public list of all the metrics on huggingface.co