Skip to content

Commit 3490a10

Browse files
committed
Add OVHcloud AI Endpoints provider
# Conflicts: # docs/source/en/guides/inference.md
1 parent 5c3a252 commit 3490a10

File tree

8 files changed

+269
-33
lines changed

8 files changed

+269
-33
lines changed

docs/source/en/guides/inference.md

Lines changed: 31 additions & 31 deletions
Large diffs are not rendered by default.

src/huggingface_hub/inference/_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ class InferenceClient:
135135
Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
136136
arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
137137
provider (`str`, *optional*):
138-
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
138+
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
139139
Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
140140
If model is a URL or `base_url` is passed, then `provider` is not used.
141141
token (`str`, *optional*):

src/huggingface_hub/inference/_generated/_async_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ class AsyncInferenceClient:
126126
Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
127127
arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
128128
provider (`str`, *optional*):
129-
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
129+
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
130130
Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
131131
If model is a URL or `base_url` is passed, then `provider` is not used.
132132
token (`str`, *optional*):

src/huggingface_hub/inference/_providers/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
3939
from .nscale import NscaleConversationalTask, NscaleTextToImageTask
4040
from .openai import OpenAIConversationalTask
41+
from .ovhcloud import OVHcloudAIEndpointsAutomaticSpeechRecognitionTask, OVHcloudAIEndpointsConversationalTask, OVHcloudAIEndpointsFeatureExtractionTask, OVHcloudAIEndpointsTextToImageTask
4142
from .publicai import PublicAIConversationalTask
4243
from .replicate import (
4344
ReplicateAutomaticSpeechRecognitionTask,
@@ -76,6 +77,7 @@
7677
"novita",
7778
"nscale",
7879
"openai",
80+
"ovhcloud",
7981
"publicai",
8082
"replicate",
8183
"sambanova",
@@ -172,6 +174,12 @@
172174
"openai": {
173175
"conversational": OpenAIConversationalTask(),
174176
},
177+
"ovhcloud": {
178+
"conversational": OVHcloudAIEndpointsConversationalTask(),
179+
"text-to-image": OVHcloudAIEndpointsTextToImageTask(),
180+
"feature-extraction": OVHcloudAIEndpointsFeatureExtractionTask(),
181+
"automatic-speech-recognition": OVHcloudAIEndpointsAutomaticSpeechRecognitionTask(),
182+
},
175183
"publicai": {
176184
"conversational": PublicAIConversationalTask(),
177185
},

src/huggingface_hub/inference/_providers/_common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"hyperbolic": {},
3333
"nebius": {},
3434
"nscale": {},
35+
"ovhcloud": {},
3536
"replicate": {},
3637
"sambanova": {},
3738
"scaleway": {},
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import base64
2+
from abc import ABC
3+
from typing import Any, Dict, Optional, Union
4+
5+
from huggingface_hub.hf_api import InferenceProviderMapping
6+
from huggingface_hub.inference._common import RequestParameters, _as_dict
7+
from huggingface_hub.inference._providers._common import (
8+
TaskProviderHelper,
9+
filter_none,
10+
)
11+
12+
_PROVIDER = "ovhcloud"
13+
_BASE_URL = "https://oai.endpoints.kepler.ai.cloud.ovh.net"
14+
15+
class OVHcloudAIEndpointsTask(TaskProviderHelper, ABC):
16+
def __init__(self, task: str):
17+
super().__init__(provider=_PROVIDER, base_url=_BASE_URL, task=task)
18+
19+
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
20+
if self.task == "text-to-image":
21+
return "/v1/images/generations"
22+
elif self.task == "conversational":
23+
return "/v1/chat/completions"
24+
elif self.task == "feature-extraction":
25+
return "/v1/embeddings"
26+
elif self.task == "automatic-speech-recognition":
27+
return "/v1/audio/transcriptions"
28+
raise ValueError(f"Unsupported task '{self.task}' for OVHcloud AI Endpoints.")
29+
30+
def _prepare_payload_as_dict(
31+
self, messages: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
32+
) -> Optional[Dict]:
33+
return {"messages": messages, "model": provider_mapping_info.provider_id, **filter_none(parameters)}
34+
35+
36+
class OVHcloudAIEndpointsConversationalTask(OVHcloudAIEndpointsTask):
37+
def __init__(self):
38+
super().__init__("conversational")
39+
40+
def _prepare_payload_as_dict(
41+
self, messages: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
42+
) -> Optional[dict]:
43+
return super()._prepare_payload_as_dict(messages, parameters, provider_mapping_info)
44+
45+
46+
class OVHcloudAIEndpointsTextToImageTask(OVHcloudAIEndpointsTask):
47+
def __init__(self):
48+
super().__init__("text-to-image")
49+
50+
def _prepare_payload_as_dict(
51+
self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
52+
) -> Optional[dict]:
53+
mapped_model = provider_mapping_info.provider_id
54+
return {"prompt": inputs, "model": mapped_model, **filter_none(parameters)}
55+
56+
def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
57+
response_dict = _as_dict(response)
58+
return base64.b64decode(response_dict["data"][0]["b64_json"])
59+
60+
class OVHcloudAIEndpointsFeatureExtractionTask(OVHcloudAIEndpointsTask):
61+
def __init__(self):
62+
super().__init__("feature-extraction")
63+
64+
def _prepare_payload_as_dict(
65+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
66+
) -> Optional[Dict]:
67+
return {"input": inputs, "model": provider_mapping_info.provider_id, **filter_none(parameters)}
68+
69+
def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
70+
embeddings = _as_dict(response)["data"]
71+
return [embedding["embedding"] for embedding in embeddings]
72+
73+
class OVHcloudAIEndpointsAutomaticSpeechRecognitionTask(OVHcloudAIEndpointsTask):
74+
def __init__(self):
75+
super().__init__("automatic-speech-recognition")
76+
77+
def _prepare_payload_as_dict(
78+
self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
79+
) -> Optional[dict]:
80+
return {"file": inputs, "model": provider_mapping_info.provider_id, **filter_none(parameters)}
81+
82+
def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
83+
response_dict = _as_dict(response)
84+
return response_dict["text"]

tests/test_inference_client.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@
117117
"text-generation": "NousResearch/Nous-Hermes-Llama2-13b",
118118
"conversational": "meta-llama/Llama-3.1-8B-Instruct",
119119
},
120+
"ovhcloud": {
121+
"automatic-speech-recognition": "openai/whisper-large-v3",
122+
"conversational": "meta-llama/Llama-3.1-8B-Instruct",
123+
"feature-extraction": "BAAI/bge-m3",
124+
"text-to-image": "stabilityai/stable-diffusion-xl-base-1.0",
125+
},
120126
"replicate": {
121127
"text-to-image": "ByteDance/SDXL-Lightning",
122128
},

tests/test_inference_providers.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
from huggingface_hub.inference._providers.novita import NovitaConversationalTask, NovitaTextGenerationTask
4747
from huggingface_hub.inference._providers.nscale import NscaleConversationalTask, NscaleTextToImageTask
4848
from huggingface_hub.inference._providers.openai import OpenAIConversationalTask
49+
from huggingface_hub.inference._providers.ovhcloud import OVHcloudAIEndpointsAutomaticSpeechRecognitionTask, OVHcloudAIEndpointsConversationalTask, OVHcloudAIEndpointsFeatureExtractionTask, OVHcloudAIEndpointsTextToImageTask
4950
from huggingface_hub.inference._providers.publicai import PublicAIConversationalTask
5051
from huggingface_hub.inference._providers.replicate import (
5152
ReplicateAutomaticSpeechRecognitionTask,
@@ -1423,6 +1424,142 @@ def test_prepare_url(self):
14231424
assert helper._prepare_url("sk-XXXXXX", "gpt-4o-mini") == "https://api.openai.com/v1/chat/completions"
14241425

14251426

1427+
class TestOVHcloudAIEndpointsProvider:
1428+
def test_prepare_hf_url_conversational(self):
1429+
helper = OVHcloudAIEndpointsConversationalTask()
1430+
url = helper._prepare_url("hf_token", "username/repo_name")
1431+
assert url == "https://router.huggingface.co/ovhcloud/v1/chat/completions"
1432+
1433+
def test_prepare_url_conversational(self):
1434+
helper = OVHcloudAIEndpointsConversationalTask()
1435+
url = helper._prepare_url("ovhcloud_token", "username/repo_name")
1436+
assert url == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/chat/completions"
1437+
1438+
def test_prepare_payload_as_dict(self):
1439+
helper = OVHcloudAIEndpointsConversationalTask()
1440+
payload = helper._prepare_payload_as_dict(
1441+
[
1442+
{"role": "system", "content": "You are a helpful assistant"},
1443+
{"role": "user", "content": "Hello!"},
1444+
],
1445+
{
1446+
"max_tokens": 512,
1447+
"temperature": 0.15,
1448+
"top_p": 1,
1449+
"presence_penalty": 0,
1450+
"stream": True,
1451+
},
1452+
InferenceProviderMapping(
1453+
provider="ovhcloud",
1454+
hf_model_id="meta-llama/Llama-3.1-8B-Instruct",
1455+
providerId="Llama-3.1-8B-Instruct",
1456+
task="conversational",
1457+
status="live",
1458+
),
1459+
)
1460+
assert payload == {
1461+
"max_tokens": 512,
1462+
"messages": [
1463+
{"content": "You are a helpful assistant", "role": "system"},
1464+
{"role": "user", "content": "Hello!"},
1465+
],
1466+
"model": "Llama-3.1-8B-Instruct",
1467+
"presence_penalty": 0,
1468+
"stream": True,
1469+
"temperature": 0.15,
1470+
"top_p": 1,
1471+
}
1472+
1473+
def test_prepare_url_feature_extraction(self):
1474+
helper = OVHcloudAIEndpointsFeatureExtractionTask()
1475+
assert (
1476+
helper._prepare_url("hf_token", "username/repo_name")
1477+
== "https://router.huggingface.co/ovhcloud/v1/embeddings"
1478+
)
1479+
1480+
def test_prepare_payload_as_dict_feature_extraction(self):
1481+
helper = OVHcloudAIEndpointsFeatureExtractionTask()
1482+
payload = helper._prepare_payload_as_dict(
1483+
"Example text to embed",
1484+
{"truncate": True},
1485+
InferenceProviderMapping(
1486+
provider="ovhcloud",
1487+
hf_model_id="BAAI/bge-m3",
1488+
providerId="BGE-M3",
1489+
task="feature-extraction",
1490+
status="live",
1491+
),
1492+
)
1493+
assert payload == {"input": "Example text to embed", "model": "BGE-M3", "truncate": True}
1494+
1495+
def test_prepare_url_text_to_image(self):
1496+
helper = OVHcloudAIEndpointsTextToImageTask()
1497+
assert (
1498+
helper._prepare_url("hf_token", "username/repo_name")
1499+
== "https://router.huggingface.co/ovhcloud/v1/images/generations"
1500+
)
1501+
1502+
url = helper._prepare_url("ovhcloud_token", "username/repo_name")
1503+
assert url == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/images/generations"
1504+
1505+
def test_prepare_payload_as_dict_text_to_image(self):
1506+
helper = OVHcloudAIEndpointsTextToImageTask()
1507+
payload = helper._prepare_payload_as_dict(
1508+
inputs="a beautiful cat",
1509+
provider_mapping_info=InferenceProviderMapping(
1510+
provider="ovhcloud",
1511+
hf_model_id="stabilityai/stable-diffusion-xl-base-1.0",
1512+
providerId="stable-diffusion-xl-base-v10",
1513+
task="text-to-image",
1514+
status="live",
1515+
),
1516+
parameters={}
1517+
)
1518+
assert payload == {
1519+
"prompt": "a beautiful cat",
1520+
"model": "stable-diffusion-xl-base-v10",
1521+
}
1522+
1523+
def test_text_to_image_get_response(self):
1524+
helper = OVHcloudAIEndpointsTextToImageTask()
1525+
response = helper.get_response({"data": [{"b64_json": base64.b64encode(b"image_bytes").decode()}]})
1526+
assert response == b"image_bytes"
1527+
1528+
def test_prepare_url_automatic_speech_recognition(self):
1529+
helper = OVHcloudAIEndpointsAutomaticSpeechRecognitionTask()
1530+
assert (
1531+
helper._prepare_url("hf_token", "username/repo_name")
1532+
== "https://router.huggingface.co/ovhcloud/v1/audio/transcriptions"
1533+
)
1534+
1535+
url = helper._prepare_url("ovhcloud_token", "username/repo_name")
1536+
assert url == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/audio/transcriptions"
1537+
1538+
def test_prepare_payload_as_dict_automatic_speech_recognition(self):
1539+
helper = OVHcloudAIEndpointsAutomaticSpeechRecognitionTask()
1540+
1541+
payload = helper._prepare_payload_as_dict(
1542+
f"data:audio/mpeg;base64,{base64.b64encode(b'dummy_audio_data').decode()}",
1543+
{},
1544+
InferenceProviderMapping(
1545+
provider="ovhcloud",
1546+
hf_model_id="openai/whisper-large-v3",
1547+
providerId="whisper-large-v3",
1548+
task="automatic-speech-recognition",
1549+
status="live",
1550+
),
1551+
)
1552+
assert payload == {
1553+
"file": f"data:audio/mpeg;base64,{base64.b64encode(b'dummy_audio_data').decode()}",
1554+
"model": "whisper-large-v3",
1555+
}
1556+
1557+
def test_automatic_speech_recognition_get_response(self):
1558+
helper = OVHcloudAIEndpointsAutomaticSpeechRecognitionTask()
1559+
response = helper.get_response({"text": "Hello world"})
1560+
assert response == "Hello world"
1561+
1562+
14261563
class TestReplicateProvider:
14271564
def test_automatic_speech_recognition_payload(self):
14281565
helper = ReplicateAutomaticSpeechRecognitionTask()

0 commit comments

Comments
 (0)