diff --git a/src/huggingface_hub/inference/_providers/hf_inference.py b/src/huggingface_hub/inference/_providers/hf_inference.py index 9ec64a0fed..2d79862a42 100644 --- a/src/huggingface_hub/inference/_providers/hf_inference.py +++ b/src/huggingface_hub/inference/_providers/hf_inference.py @@ -38,7 +38,13 @@ def _prepare_url(self, api_key: str, mapped_model: str) -> str: # hf-inference provider can handle URLs (e.g. Inference Endpoints or TGI deployment) if mapped_model.startswith(("http://", "https://")): return mapped_model - return f"{self.base_url}/models/{mapped_model}" + return ( + # Feature-extraction and sentence-similarity are the only cases where we handle models with several tasks. + f"{self.base_url}/pipeline/{self.task}/{mapped_model}" + if self.task in ("feature-extraction", "sentence-similarity") + # Otherwise, we use the default endpoint + else f"{self.base_url}/models/{mapped_model}" + ) def _prepare_payload(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]: if isinstance(inputs, bytes): diff --git a/tests/cassettes/TestInferenceClient.test_sentence_similarity[hf-inference,sentence-similarity].yaml b/tests/cassettes/TestInferenceClient.test_sentence_similarity[hf-inference,sentence-similarity].yaml index 090885b1d2..246aeccb10 100644 --- a/tests/cassettes/TestInferenceClient.test_sentence_similarity[hf-inference,sentence-similarity].yaml +++ b/tests/cassettes/TestInferenceClient.test_sentence_similarity[hf-inference,sentence-similarity].yaml @@ -17,7 +17,7 @@ interactions: X-Amzn-Trace-Id: - 0434ff33-56fe-49db-9380-17b81e41f756 method: POST - uri: https://router.huggingface.co/hf-inference/models/sentence-transformers/all-MiniLM-L6-v2 + uri: https://router.huggingface.co/hf-inference/pipeline/sentence-similarity/sentence-transformers/all-MiniLM-L6-v2 response: body: string: '[0.7785724997520447,0.4587624967098236,0.29062220454216003]' diff --git a/tests/cassettes/test_async_sentence_similarity.yaml b/tests/cassettes/test_async_sentence_similarity.yaml index 6af535ae13..3d975bedfc 100644 --- a/tests/cassettes/test_async_sentence_similarity.yaml +++ b/tests/cassettes/test_async_sentence_similarity.yaml @@ -3,7 +3,7 @@ interactions: body: null headers: {} method: POST - uri: https://router.huggingface.co/hf-inference/models/sentence-transformers/all-MiniLM-L6-v2 + uri: https://router.huggingface.co/hf-inference/pipeline/sentence-similarity/sentence-transformers/all-MiniLM-L6-v2 response: body: string: '[0.7785724997520447,0.4587624967098236,0.29062220454216003]'