huggingface
diff --git a/‎docs/source/en/package_reference/inference_client.md‎
Lines changed: 0 additions & 4 deletions b/‎docs/source/en/package_reference/inference_client.md‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎docs/source/ko/package_reference/inference_client.md‎
Lines changed: 0 additions & 3 deletions b/‎docs/source/ko/package_reference/inference_client.md‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎src/huggingface_hub/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/huggingface_hub/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/huggingface_hub/inference/_client.py‎
Lines changed: 1 addition & 162 deletions b/‎src/huggingface_hub/inference/_client.py‎
Lines changed: 1 addition & 162 deletions
diff --git a/‎src/huggingface_hub/inference/_common.py‎
Lines changed: 0 additions & 31 deletions b/‎src/huggingface_hub/inference/_common.py‎
Lines changed: 0 additions & 31 deletions
@@ -35,10 +35,6 @@ pip install --upgrade huggingface_hub[inference]
 
 [[autodoc]] InferenceTimeoutError
 
-### ModelStatus
-
-[[autodoc]] huggingface_hub.inference._common.ModelStatus
-
 ## InferenceAPI
 
 [`InferenceAPI`] is the legacy way to call the Inference API. The interface is more simplistic and requires knowing
 
@@ -36,9 +36,6 @@ pip install --upgrade huggingface_hub[inference]
 
 대부분의 작업에 대해, 반환 값은 내장된 유형(string, list, image...)을 갖습니다. 보다 복잡한 유형을 위한 목록은 다음과 같습니다.
 
-### 모델 상태[[huggingface_hub.inference._common.ModelStatus]]
-
-[[autodoc]] huggingface_hub.inference._common.ModelStatus
 
 ## 추론 API[[huggingface_hub.InferenceApi]]
 
 
@@ -46,7 +46,7 @@
 from typing import TYPE_CHECKING
 
 
-__version__ = "0.34.0.dev0"
+__version__ = "0.35.0.dev0"
 
 # Alphabetical order of definitions is ensured in tests
 # WARNING: any comment added in this dictionary definition will be lost when
 
@@ -45,7 +45,6 @@
 from huggingface_hub.inference._common import (
     TASKS_EXPECTING_IMAGES,
     ContentT,
-    ModelStatus,
     RequestParameters,
     _b64_encode,
     _b64_to_image,
@@ -104,7 +103,6 @@
 from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
 from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
 from huggingface_hub.utils._auth import get_token
-from huggingface_hub.utils._deprecation import _deprecate_method
 
 
 if TYPE_CHECKING:
@@ -3193,101 +3191,6 @@ def zero_shot_image_classification(
         response = self._inner_post(request_parameters)
         return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
 
-    @_deprecate_method(
-        version="0.35.0",
-        message=(
-            "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
-            " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
-        ),
-    )
-    def list_deployed_models(
-        self, frameworks: Union[None, str, Literal["all"], List[str]] = None
-    ) -> Dict[str, List[str]]:
-        """
-        List models deployed on the HF Serverless Inference API service.
-
-        This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
-        are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
-        specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
-        in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
-        frameworks are checked, the more time it will take.
-
-        <Tip warning={true}>
-
-        This endpoint method does not return a live list of all models available for the HF Inference API service.
-        It searches over a cached list of models that were recently available and the list may not be up to date.
-        If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
-
-        </Tip>
-
-        <Tip>
-
-        This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
-        check its availability, you can directly use [`~InferenceClient.get_model_status`].
-
-        </Tip>
-
-        Args:
-            frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
-                The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
-                "all", all available frameworks will be tested. It is also possible to provide a single framework or a
-                custom set of frameworks to check.
-
-        Returns:
-            `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
-
-        Example:
-        ```python
-        >>> from huggingface_hub import InferenceClient
-        >>> client = InferenceClient()
-
-        # Discover zero-shot-classification models currently deployed
-        >>> models = client.list_deployed_models()
-        >>> models["zero-shot-classification"]
-        ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
-
-        # List from only 1 framework
-        >>> client.list_deployed_models("text-generation-inference")
-        {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
-        ```
-        """
-        if self.provider != "hf-inference":
-            raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
-
-        # Resolve which frameworks to check
-        if frameworks is None:
-            frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
-        elif frameworks == "all":
-            frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
-        elif isinstance(frameworks, str):
-            frameworks = [frameworks]
-        frameworks = list(set(frameworks))
-
-        # Fetch them iteratively
-        models_by_task: Dict[str, List[str]] = {}
-
-        def _unpack_response(framework: str, items: List[Dict]) -> None:
-            for model in items:
-                if framework == "sentence-transformers":
-                    # Model running with the `sentence-transformers` framework can work with both tasks even if not
-                    # branded as such in the API response
-                    models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
-                    models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
-                else:
-                    models_by_task.setdefault(model["task"], []).append(model["model_id"])
-
-        for framework in frameworks:
-            response = get_session().get(
-                f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
-            )
-            hf_raise_for_status(response)
-            _unpack_response(framework, response.json())
-
-        # Sort alphabetically for discoverability and return
-        for task, models in models_by_task.items():
-            models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
-        return models_by_task
-
     def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
         """
         Get information about the deployed endpoint.
@@ -3351,7 +3254,6 @@ def health_check(self, model: Optional[str] = None) -> bool:
         Check the health of the deployed endpoint.
 
         Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
-        For Inference API, please use [`InferenceClient.get_model_status`] instead.
 
         Args:
             model (`str`, *optional*):
@@ -3375,75 +3277,12 @@ def health_check(self, model: Optional[str] = None) -> bool:
         if model is None:
             raise ValueError("Model id not provided.")
         if not model.startswith(("http://", "https://")):
-            raise ValueError(
-                "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
-            )
+            raise ValueError("Model must be an Inference Endpoint URL.")
         url = model.rstrip("/") + "/health"
 
         response = get_session().get(url, headers=build_hf_headers(token=self.token))
         return response.status_code == 200
 
-    @_deprecate_method(
-        version="0.35.0",
-        message=(
-            "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
-            " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
-        ),
-    )
-    def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
-        """
-        Get the status of a model hosted on the HF Inference API.
-
-        <Tip>
-
-        This endpoint is mostly useful when you already know which model you want to use and want to check its
-        availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
-
-        </Tip>
-
-        Args:
-            model (`str`, *optional*):
-                Identifier of the model for witch the status gonna be checked. If model is not provided,
-                the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
-                identifier cannot be a URL.
-
-
-        Returns:
-            [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
-                         about the state of the model: load, state, compute type and framework.
-
-        Example:
-        ```py
-        >>> from huggingface_hub import InferenceClient
-        >>> client = InferenceClient()
-        >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
-        ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
-        ```
-        """
-        if self.provider != "hf-inference":
-            raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
-
-        model = model or self.model
-        if model is None:
-            raise ValueError("Model id not provided.")
-        if model.startswith("https://"):
-            raise NotImplementedError("Model status is only available for Inference API endpoints.")
-        url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
-
-        response = get_session().get(url, headers=build_hf_headers(token=self.token))
-        hf_raise_for_status(response)
-        response_data = response.json()
-
-        if "error" in response_data:
-            raise ValueError(response_data["error"])
-
-        return ModelStatus(
-            loaded=response_data["loaded"],
-            state=response_data["state"],
-            compute_type=response_data["compute_type"],
-            framework=response_data["framework"],
-        )
-
     @property
     def chat(self) -> "ProxyClientChat":
         return ProxyClientChat(self)
 
@@ -80,37 +80,6 @@ class RequestParameters:
     headers: Dict[str, Any]
 
 
-# Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
-@dataclass
-class ModelStatus:
-    """
-    This Dataclass represents the model status in the HF Inference API.
-
-    Args:
-        loaded (`bool`):
-            If the model is currently loaded into HF's Inference API. Models
-            are loaded on-demand, leading to the user's first request taking longer.
-            If a model is loaded, you can be assured that it is in a healthy state.
-        state (`str`):
-            The current state of the model. This can be 'Loaded', 'Loadable', 'TooBig'.
-            If a model's state is 'Loadable', it's not too big and has a supported
-            backend. Loadable models are automatically loaded when the user first
-            requests inference on the endpoint. This means it is transparent for the
-            user to load a model, except that the first call takes longer to complete.
-        compute_type (`Dict`):
-            Information about the compute resource the model is using or will use, such as 'gpu' type and number of
-            replicas.
-        framework (`str`):
-            The name of the framework that the model was built with, such as 'transformers'
-            or 'text-generation-inference'.
-    """
-
-    loaded: bool
-    state: str
-    compute_type: Dict
-    framework: str
-
-
 ## IMPORT UTILS