4545from huggingface_hub .inference ._common import (
4646 TASKS_EXPECTING_IMAGES ,
4747 ContentT ,
48- ModelStatus ,
4948 RequestParameters ,
5049 _b64_encode ,
5150 _b64_to_image ,
104103from huggingface_hub .inference ._providers import PROVIDER_OR_POLICY_T , get_provider_helper
105104from huggingface_hub .utils import build_hf_headers , get_session , hf_raise_for_status
106105from huggingface_hub .utils ._auth import get_token
107- from huggingface_hub .utils ._deprecation import _deprecate_method
108106
109107
110108if TYPE_CHECKING :
@@ -3193,101 +3191,6 @@ def zero_shot_image_classification(
31933191 response = self ._inner_post (request_parameters )
31943192 return ZeroShotImageClassificationOutputElement .parse_obj_as_list (response )
31953193
3196- @_deprecate_method (
3197- version = "0.35.0" ,
3198- message = (
3199- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3200- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3201- ),
3202- )
3203- def list_deployed_models (
3204- self , frameworks : Union [None , str , Literal ["all" ], List [str ]] = None
3205- ) -> Dict [str , List [str ]]:
3206- """
3207- List models deployed on the HF Serverless Inference API service.
3208-
3209- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3210- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3211- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3212- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3213- frameworks are checked, the more time it will take.
3214-
3215- <Tip warning={true}>
3216-
3217- This endpoint method does not return a live list of all models available for the HF Inference API service.
3218- It searches over a cached list of models that were recently available and the list may not be up to date.
3219- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3220-
3221- </Tip>
3222-
3223- <Tip>
3224-
3225- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3226- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3227-
3228- </Tip>
3229-
3230- Args:
3231- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3232- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3233- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3234- custom set of frameworks to check.
3235-
3236- Returns:
3237- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3238-
3239- Example:
3240- ```python
3241- >>> from huggingface_hub import InferenceClient
3242- >>> client = InferenceClient()
3243-
3244- # Discover zero-shot-classification models currently deployed
3245- >>> models = client.list_deployed_models()
3246- >>> models["zero-shot-classification"]
3247- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3248-
3249- # List from only 1 framework
3250- >>> client.list_deployed_models("text-generation-inference")
3251- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3252- ```
3253- """
3254- if self .provider != "hf-inference" :
3255- raise ValueError (f"Listing deployed models is not supported on '{ self .provider } '." )
3256-
3257- # Resolve which frameworks to check
3258- if frameworks is None :
3259- frameworks = constants .MAIN_INFERENCE_API_FRAMEWORKS
3260- elif frameworks == "all" :
3261- frameworks = constants .ALL_INFERENCE_API_FRAMEWORKS
3262- elif isinstance (frameworks , str ):
3263- frameworks = [frameworks ]
3264- frameworks = list (set (frameworks ))
3265-
3266- # Fetch them iteratively
3267- models_by_task : Dict [str , List [str ]] = {}
3268-
3269- def _unpack_response (framework : str , items : List [Dict ]) -> None :
3270- for model in items :
3271- if framework == "sentence-transformers" :
3272- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3273- # branded as such in the API response
3274- models_by_task .setdefault ("feature-extraction" , []).append (model ["model_id" ])
3275- models_by_task .setdefault ("sentence-similarity" , []).append (model ["model_id" ])
3276- else :
3277- models_by_task .setdefault (model ["task" ], []).append (model ["model_id" ])
3278-
3279- for framework in frameworks :
3280- response = get_session ().get (
3281- f"{ constants .INFERENCE_ENDPOINT } /framework/{ framework } " , headers = build_hf_headers (token = self .token )
3282- )
3283- hf_raise_for_status (response )
3284- _unpack_response (framework , response .json ())
3285-
3286- # Sort alphabetically for discoverability and return
3287- for task , models in models_by_task .items ():
3288- models_by_task [task ] = sorted (set (models ), key = lambda x : x .lower ())
3289- return models_by_task
3290-
32913194 def get_endpoint_info (self , * , model : Optional [str ] = None ) -> Dict [str , Any ]:
32923195 """
32933196 Get information about the deployed endpoint.
@@ -3351,7 +3254,6 @@ def health_check(self, model: Optional[str] = None) -> bool:
33513254 Check the health of the deployed endpoint.
33523255
33533256 Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3354- For Inference API, please use [`InferenceClient.get_model_status`] instead.
33553257
33563258 Args:
33573259 model (`str`, *optional*):
@@ -3375,75 +3277,12 @@ def health_check(self, model: Optional[str] = None) -> bool:
33753277 if model is None :
33763278 raise ValueError ("Model id not provided." )
33773279 if not model .startswith (("http://" , "https://" )):
3378- raise ValueError (
3379- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3380- )
3280+ raise ValueError ("Model must be an Inference Endpoint URL." )
33813281 url = model .rstrip ("/" ) + "/health"
33823282
33833283 response = get_session ().get (url , headers = build_hf_headers (token = self .token ))
33843284 return response .status_code == 200
33853285
3386- @_deprecate_method (
3387- version = "0.35.0" ,
3388- message = (
3389- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3390- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3391- ),
3392- )
3393- def get_model_status (self , model : Optional [str ] = None ) -> ModelStatus :
3394- """
3395- Get the status of a model hosted on the HF Inference API.
3396-
3397- <Tip>
3398-
3399- This endpoint is mostly useful when you already know which model you want to use and want to check its
3400- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3401-
3402- </Tip>
3403-
3404- Args:
3405- model (`str`, *optional*):
3406- Identifier of the model for witch the status gonna be checked. If model is not provided,
3407- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3408- identifier cannot be a URL.
3409-
3410-
3411- Returns:
3412- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3413- about the state of the model: load, state, compute type and framework.
3414-
3415- Example:
3416- ```py
3417- >>> from huggingface_hub import InferenceClient
3418- >>> client = InferenceClient()
3419- >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3420- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3421- ```
3422- """
3423- if self .provider != "hf-inference" :
3424- raise ValueError (f"Getting model status is not supported on '{ self .provider } '." )
3425-
3426- model = model or self .model
3427- if model is None :
3428- raise ValueError ("Model id not provided." )
3429- if model .startswith ("https://" ):
3430- raise NotImplementedError ("Model status is only available for Inference API endpoints." )
3431- url = f"{ constants .INFERENCE_ENDPOINT } /status/{ model } "
3432-
3433- response = get_session ().get (url , headers = build_hf_headers (token = self .token ))
3434- hf_raise_for_status (response )
3435- response_data = response .json ()
3436-
3437- if "error" in response_data :
3438- raise ValueError (response_data ["error" ])
3439-
3440- return ModelStatus (
3441- loaded = response_data ["loaded" ],
3442- state = response_data ["state" ],
3443- compute_type = response_data ["compute_type" ],
3444- framework = response_data ["framework" ],
3445- )
3446-
34473286 @property
34483287 def chat (self ) -> "ProxyClientChat" :
34493288 return ProxyClientChat (self )
0 commit comments