45
45
from huggingface_hub .inference ._common import (
46
46
TASKS_EXPECTING_IMAGES ,
47
47
ContentT ,
48
- ModelStatus ,
49
48
RequestParameters ,
50
49
_b64_encode ,
51
50
_b64_to_image ,
104
103
from huggingface_hub .inference ._providers import PROVIDER_OR_POLICY_T , get_provider_helper
105
104
from huggingface_hub .utils import build_hf_headers , get_session , hf_raise_for_status
106
105
from huggingface_hub .utils ._auth import get_token
107
- from huggingface_hub .utils ._deprecation import _deprecate_method
108
106
109
107
110
108
if TYPE_CHECKING :
@@ -3193,101 +3191,6 @@ def zero_shot_image_classification(
3193
3191
response = self ._inner_post (request_parameters )
3194
3192
return ZeroShotImageClassificationOutputElement .parse_obj_as_list (response )
3195
3193
3196
- @_deprecate_method (
3197
- version = "0.35.0" ,
3198
- message = (
3199
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3200
- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3201
- ),
3202
- )
3203
- def list_deployed_models (
3204
- self , frameworks : Union [None , str , Literal ["all" ], List [str ]] = None
3205
- ) -> Dict [str , List [str ]]:
3206
- """
3207
- List models deployed on the HF Serverless Inference API service.
3208
-
3209
- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3210
- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3211
- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3212
- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3213
- frameworks are checked, the more time it will take.
3214
-
3215
- <Tip warning={true}>
3216
-
3217
- This endpoint method does not return a live list of all models available for the HF Inference API service.
3218
- It searches over a cached list of models that were recently available and the list may not be up to date.
3219
- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3220
-
3221
- </Tip>
3222
-
3223
- <Tip>
3224
-
3225
- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3226
- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3227
-
3228
- </Tip>
3229
-
3230
- Args:
3231
- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3232
- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3233
- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3234
- custom set of frameworks to check.
3235
-
3236
- Returns:
3237
- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3238
-
3239
- Example:
3240
- ```python
3241
- >>> from huggingface_hub import InferenceClient
3242
- >>> client = InferenceClient()
3243
-
3244
- # Discover zero-shot-classification models currently deployed
3245
- >>> models = client.list_deployed_models()
3246
- >>> models["zero-shot-classification"]
3247
- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3248
-
3249
- # List from only 1 framework
3250
- >>> client.list_deployed_models("text-generation-inference")
3251
- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3252
- ```
3253
- """
3254
- if self .provider != "hf-inference" :
3255
- raise ValueError (f"Listing deployed models is not supported on '{ self .provider } '." )
3256
-
3257
- # Resolve which frameworks to check
3258
- if frameworks is None :
3259
- frameworks = constants .MAIN_INFERENCE_API_FRAMEWORKS
3260
- elif frameworks == "all" :
3261
- frameworks = constants .ALL_INFERENCE_API_FRAMEWORKS
3262
- elif isinstance (frameworks , str ):
3263
- frameworks = [frameworks ]
3264
- frameworks = list (set (frameworks ))
3265
-
3266
- # Fetch them iteratively
3267
- models_by_task : Dict [str , List [str ]] = {}
3268
-
3269
- def _unpack_response (framework : str , items : List [Dict ]) -> None :
3270
- for model in items :
3271
- if framework == "sentence-transformers" :
3272
- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3273
- # branded as such in the API response
3274
- models_by_task .setdefault ("feature-extraction" , []).append (model ["model_id" ])
3275
- models_by_task .setdefault ("sentence-similarity" , []).append (model ["model_id" ])
3276
- else :
3277
- models_by_task .setdefault (model ["task" ], []).append (model ["model_id" ])
3278
-
3279
- for framework in frameworks :
3280
- response = get_session ().get (
3281
- f"{ constants .INFERENCE_ENDPOINT } /framework/{ framework } " , headers = build_hf_headers (token = self .token )
3282
- )
3283
- hf_raise_for_status (response )
3284
- _unpack_response (framework , response .json ())
3285
-
3286
- # Sort alphabetically for discoverability and return
3287
- for task , models in models_by_task .items ():
3288
- models_by_task [task ] = sorted (set (models ), key = lambda x : x .lower ())
3289
- return models_by_task
3290
-
3291
3194
def get_endpoint_info (self , * , model : Optional [str ] = None ) -> Dict [str , Any ]:
3292
3195
"""
3293
3196
Get information about the deployed endpoint.
@@ -3351,7 +3254,6 @@ def health_check(self, model: Optional[str] = None) -> bool:
3351
3254
Check the health of the deployed endpoint.
3352
3255
3353
3256
Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3354
- For Inference API, please use [`InferenceClient.get_model_status`] instead.
3355
3257
3356
3258
Args:
3357
3259
model (`str`, *optional*):
@@ -3375,75 +3277,12 @@ def health_check(self, model: Optional[str] = None) -> bool:
3375
3277
if model is None :
3376
3278
raise ValueError ("Model id not provided." )
3377
3279
if not model .startswith (("http://" , "https://" )):
3378
- raise ValueError (
3379
- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3380
- )
3280
+ raise ValueError ("Model must be an Inference Endpoint URL." )
3381
3281
url = model .rstrip ("/" ) + "/health"
3382
3282
3383
3283
response = get_session ().get (url , headers = build_hf_headers (token = self .token ))
3384
3284
return response .status_code == 200
3385
3285
3386
- @_deprecate_method (
3387
- version = "0.35.0" ,
3388
- message = (
3389
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3390
- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3391
- ),
3392
- )
3393
- def get_model_status (self , model : Optional [str ] = None ) -> ModelStatus :
3394
- """
3395
- Get the status of a model hosted on the HF Inference API.
3396
-
3397
- <Tip>
3398
-
3399
- This endpoint is mostly useful when you already know which model you want to use and want to check its
3400
- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3401
-
3402
- </Tip>
3403
-
3404
- Args:
3405
- model (`str`, *optional*):
3406
- Identifier of the model for witch the status gonna be checked. If model is not provided,
3407
- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3408
- identifier cannot be a URL.
3409
-
3410
-
3411
- Returns:
3412
- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3413
- about the state of the model: load, state, compute type and framework.
3414
-
3415
- Example:
3416
- ```py
3417
- >>> from huggingface_hub import InferenceClient
3418
- >>> client = InferenceClient()
3419
- >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3420
- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3421
- ```
3422
- """
3423
- if self .provider != "hf-inference" :
3424
- raise ValueError (f"Getting model status is not supported on '{ self .provider } '." )
3425
-
3426
- model = model or self .model
3427
- if model is None :
3428
- raise ValueError ("Model id not provided." )
3429
- if model .startswith ("https://" ):
3430
- raise NotImplementedError ("Model status is only available for Inference API endpoints." )
3431
- url = f"{ constants .INFERENCE_ENDPOINT } /status/{ model } "
3432
-
3433
- response = get_session ().get (url , headers = build_hf_headers (token = self .token ))
3434
- hf_raise_for_status (response )
3435
- response_data = response .json ()
3436
-
3437
- if "error" in response_data :
3438
- raise ValueError (response_data ["error" ])
3439
-
3440
- return ModelStatus (
3441
- loaded = response_data ["loaded" ],
3442
- state = response_data ["state" ],
3443
- compute_type = response_data ["compute_type" ],
3444
- framework = response_data ["framework" ],
3445
- )
3446
-
3447
3286
@property
3448
3287
def chat (self ) -> "ProxyClientChat" :
3449
3288
return ProxyClientChat (self )
0 commit comments