Skip to content

Commit 206ff8d

Browse files
authored
Prepare for v0.35 (#3261)
1 parent f9bc1cb commit 206ff8d

File tree

9 files changed

+5
-475
lines changed

9 files changed

+5
-475
lines changed

docs/source/en/package_reference/inference_client.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@ pip install --upgrade huggingface_hub[inference]
3535

3636
[[autodoc]] InferenceTimeoutError
3737

38-
### ModelStatus
39-
40-
[[autodoc]] huggingface_hub.inference._common.ModelStatus
41-
4238
## InferenceAPI
4339

4440
[`InferenceAPI`] is the legacy way to call the Inference API. The interface is more simplistic and requires knowing

docs/source/ko/package_reference/inference_client.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,6 @@ pip install --upgrade huggingface_hub[inference]
3636

3737
대부분의 작업에 대해, 반환 값은 내장된 유형(string, list, image...)을 갖습니다. 보다 복잡한 유형을 위한 목록은 다음과 같습니다.
3838

39-
### 모델 상태[[huggingface_hub.inference._common.ModelStatus]]
40-
41-
[[autodoc]] huggingface_hub.inference._common.ModelStatus
4239

4340
## 추론 API[[huggingface_hub.InferenceApi]]
4441

src/huggingface_hub/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from typing import TYPE_CHECKING
4747

4848

49-
__version__ = "0.34.0.dev0"
49+
__version__ = "0.35.0.dev0"
5050

5151
# Alphabetical order of definitions is ensured in tests
5252
# WARNING: any comment added in this dictionary definition will be lost when

src/huggingface_hub/inference/_client.py

Lines changed: 1 addition & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
from huggingface_hub.inference._common import (
4646
TASKS_EXPECTING_IMAGES,
4747
ContentT,
48-
ModelStatus,
4948
RequestParameters,
5049
_b64_encode,
5150
_b64_to_image,
@@ -104,7 +103,6 @@
104103
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
105104
from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
106105
from huggingface_hub.utils._auth import get_token
107-
from huggingface_hub.utils._deprecation import _deprecate_method
108106

109107

110108
if TYPE_CHECKING:
@@ -3193,101 +3191,6 @@ def zero_shot_image_classification(
31933191
response = self._inner_post(request_parameters)
31943192
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
31953193

3196-
@_deprecate_method(
3197-
version="0.35.0",
3198-
message=(
3199-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3200-
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3201-
),
3202-
)
3203-
def list_deployed_models(
3204-
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3205-
) -> Dict[str, List[str]]:
3206-
"""
3207-
List models deployed on the HF Serverless Inference API service.
3208-
3209-
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3210-
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3211-
specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3212-
in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3213-
frameworks are checked, the more time it will take.
3214-
3215-
<Tip warning={true}>
3216-
3217-
This endpoint method does not return a live list of all models available for the HF Inference API service.
3218-
It searches over a cached list of models that were recently available and the list may not be up to date.
3219-
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3220-
3221-
</Tip>
3222-
3223-
<Tip>
3224-
3225-
This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3226-
check its availability, you can directly use [`~InferenceClient.get_model_status`].
3227-
3228-
</Tip>
3229-
3230-
Args:
3231-
frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3232-
The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3233-
"all", all available frameworks will be tested. It is also possible to provide a single framework or a
3234-
custom set of frameworks to check.
3235-
3236-
Returns:
3237-
`Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3238-
3239-
Example:
3240-
```python
3241-
>>> from huggingface_hub import InferenceClient
3242-
>>> client = InferenceClient()
3243-
3244-
# Discover zero-shot-classification models currently deployed
3245-
>>> models = client.list_deployed_models()
3246-
>>> models["zero-shot-classification"]
3247-
['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3248-
3249-
# List from only 1 framework
3250-
>>> client.list_deployed_models("text-generation-inference")
3251-
{'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3252-
```
3253-
"""
3254-
if self.provider != "hf-inference":
3255-
raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
3256-
3257-
# Resolve which frameworks to check
3258-
if frameworks is None:
3259-
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3260-
elif frameworks == "all":
3261-
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3262-
elif isinstance(frameworks, str):
3263-
frameworks = [frameworks]
3264-
frameworks = list(set(frameworks))
3265-
3266-
# Fetch them iteratively
3267-
models_by_task: Dict[str, List[str]] = {}
3268-
3269-
def _unpack_response(framework: str, items: List[Dict]) -> None:
3270-
for model in items:
3271-
if framework == "sentence-transformers":
3272-
# Model running with the `sentence-transformers` framework can work with both tasks even if not
3273-
# branded as such in the API response
3274-
models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
3275-
models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
3276-
else:
3277-
models_by_task.setdefault(model["task"], []).append(model["model_id"])
3278-
3279-
for framework in frameworks:
3280-
response = get_session().get(
3281-
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3282-
)
3283-
hf_raise_for_status(response)
3284-
_unpack_response(framework, response.json())
3285-
3286-
# Sort alphabetically for discoverability and return
3287-
for task, models in models_by_task.items():
3288-
models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
3289-
return models_by_task
3290-
32913194
def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
32923195
"""
32933196
Get information about the deployed endpoint.
@@ -3351,7 +3254,6 @@ def health_check(self, model: Optional[str] = None) -> bool:
33513254
Check the health of the deployed endpoint.
33523255
33533256
Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3354-
For Inference API, please use [`InferenceClient.get_model_status`] instead.
33553257
33563258
Args:
33573259
model (`str`, *optional*):
@@ -3375,75 +3277,12 @@ def health_check(self, model: Optional[str] = None) -> bool:
33753277
if model is None:
33763278
raise ValueError("Model id not provided.")
33773279
if not model.startswith(("http://", "https://")):
3378-
raise ValueError(
3379-
"Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3380-
)
3280+
raise ValueError("Model must be an Inference Endpoint URL.")
33813281
url = model.rstrip("/") + "/health"
33823282

33833283
response = get_session().get(url, headers=build_hf_headers(token=self.token))
33843284
return response.status_code == 200
33853285

3386-
@_deprecate_method(
3387-
version="0.35.0",
3388-
message=(
3389-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3390-
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3391-
),
3392-
)
3393-
def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3394-
"""
3395-
Get the status of a model hosted on the HF Inference API.
3396-
3397-
<Tip>
3398-
3399-
This endpoint is mostly useful when you already know which model you want to use and want to check its
3400-
availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3401-
3402-
</Tip>
3403-
3404-
Args:
3405-
model (`str`, *optional*):
3406-
Identifier of the model for witch the status gonna be checked. If model is not provided,
3407-
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3408-
identifier cannot be a URL.
3409-
3410-
3411-
Returns:
3412-
[`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3413-
about the state of the model: load, state, compute type and framework.
3414-
3415-
Example:
3416-
```py
3417-
>>> from huggingface_hub import InferenceClient
3418-
>>> client = InferenceClient()
3419-
>>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3420-
ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3421-
```
3422-
"""
3423-
if self.provider != "hf-inference":
3424-
raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
3425-
3426-
model = model or self.model
3427-
if model is None:
3428-
raise ValueError("Model id not provided.")
3429-
if model.startswith("https://"):
3430-
raise NotImplementedError("Model status is only available for Inference API endpoints.")
3431-
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3432-
3433-
response = get_session().get(url, headers=build_hf_headers(token=self.token))
3434-
hf_raise_for_status(response)
3435-
response_data = response.json()
3436-
3437-
if "error" in response_data:
3438-
raise ValueError(response_data["error"])
3439-
3440-
return ModelStatus(
3441-
loaded=response_data["loaded"],
3442-
state=response_data["state"],
3443-
compute_type=response_data["compute_type"],
3444-
framework=response_data["framework"],
3445-
)
3446-
34473286
@property
34483287
def chat(self) -> "ProxyClientChat":
34493288
return ProxyClientChat(self)

src/huggingface_hub/inference/_common.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -80,37 +80,6 @@ class RequestParameters:
8080
headers: Dict[str, Any]
8181

8282

83-
# Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
84-
@dataclass
85-
class ModelStatus:
86-
"""
87-
This Dataclass represents the model status in the HF Inference API.
88-
89-
Args:
90-
loaded (`bool`):
91-
If the model is currently loaded into HF's Inference API. Models
92-
are loaded on-demand, leading to the user's first request taking longer.
93-
If a model is loaded, you can be assured that it is in a healthy state.
94-
state (`str`):
95-
The current state of the model. This can be 'Loaded', 'Loadable', 'TooBig'.
96-
If a model's state is 'Loadable', it's not too big and has a supported
97-
backend. Loadable models are automatically loaded when the user first
98-
requests inference on the endpoint. This means it is transparent for the
99-
user to load a model, except that the first call takes longer to complete.
100-
compute_type (`Dict`):
101-
Information about the compute resource the model is using or will use, such as 'gpu' type and number of
102-
replicas.
103-
framework (`str`):
104-
The name of the framework that the model was built with, such as 'transformers'
105-
or 'text-generation-inference'.
106-
"""
107-
108-
loaded: bool
109-
state: str
110-
compute_type: Dict
111-
framework: str
112-
113-
11483
## IMPORT UTILS
11584

11685

0 commit comments

Comments
 (0)