huggingface · Wauplin · Nov 14, 2025 · Oct 28, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -135,7 +135,7 @@ class InferenceClient:
             Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
             arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):

diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -126,7 +126,7 @@ class AsyncInferenceClient:
             Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
             arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):

diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py
@@ -38,6 +38,7 @@
 from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
 from .nscale import NscaleConversationalTask, NscaleTextToImageTask
 from .openai import OpenAIConversationalTask
+from .ovhcloud import OVHcloudConversationalTask, OVHcloudTextGenerationTask
 from .publicai import PublicAIConversationalTask
 from .replicate import (
     ReplicateAutomaticSpeechRecognitionTask,
@@ -76,6 +77,7 @@
     "novita",
     "nscale",
     "openai",
+    "ovhcloud",
     "publicai",
     "replicate",
     "sambanova",
@@ -172,6 +174,10 @@
     "openai": {
         "conversational": OpenAIConversationalTask(),
     },
+    "ovhcloud": {
+        "conversational": OVHcloudConversationalTask(),
+        "text-generation": OVHcloudTextGenerationTask(),
+    },
     "publicai": {
         "conversational": PublicAIConversationalTask(),
     },

diff --git a/src/huggingface_hub/inference/_providers/_common.py b/src/huggingface_hub/inference/_providers/_common.py
@@ -32,6 +32,7 @@
     "hyperbolic": {},
     "nebius": {},
     "nscale": {},
+    "ovhcloud": {},
     "replicate": {},
     "sambanova": {},
     "scaleway": {},

diff --git a/src/huggingface_hub/inference/_providers/ovhcloud.py b/src/huggingface_hub/inference/_providers/ovhcloud.py
@@ -0,0 +1,28 @@
+from typing import Any, Optional, Union
+
+from huggingface_hub.inference._common import RequestParameters, _as_dict
+from huggingface_hub.inference._providers._common import BaseConversationalTask, BaseTextGenerationTask
+
+
+_PROVIDER = "ovhcloud"
+_BASE_URL = "https://oai.endpoints.kepler.ai.cloud.ovh.net"
+
+
+class OVHcloudConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+
+
+class OVHcloudTextGenerationTask(BaseTextGenerationTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+
+    def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any:
+        output = _as_dict(response)["choices"][0]
+        return {
+            "generated_text": output["text"],
+            "details": {
+                "finish_reason": output.get("finish_reason"),
+                "seed": output.get("seed"),
+            },
+        }
diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py
@@ -117,6 +117,10 @@
         "text-generation": "NousResearch/Nous-Hermes-Llama2-13b",
         "conversational": "meta-llama/Llama-3.1-8B-Instruct",
     },
+    "ovhcloud": {
+        "conversational": "meta-llama/Llama-3.1-8B-Instruct",
+        "text-generation": "meta-llama/Llama-3.1-8B-Instruct",
+    },
     "replicate": {
         "text-to-image": "ByteDance/SDXL-Lightning",
     },

diff --git a/tests/test_inference_providers.py b/tests/test_inference_providers.py
@@ -46,6 +46,10 @@
 from huggingface_hub.inference._providers.novita import NovitaConversationalTask, NovitaTextGenerationTask
 from huggingface_hub.inference._providers.nscale import NscaleConversationalTask, NscaleTextToImageTask
 from huggingface_hub.inference._providers.openai import OpenAIConversationalTask
+from huggingface_hub.inference._providers.ovhcloud import (
+    OVHcloudConversationalTask,
+    OVHcloudTextGenerationTask,
+)
 from huggingface_hub.inference._providers.publicai import PublicAIConversationalTask
 from huggingface_hub.inference._providers.replicate import (
     ReplicateAutomaticSpeechRecognitionTask,
@@ -1423,6 +1427,110 @@ def test_prepare_url(self):
         assert helper._prepare_url("sk-XXXXXX", "gpt-4o-mini") == "https://api.openai.com/v1/chat/completions"
 
 
+class TestOVHcloudAIEndpointsProvider:
+    def test_prepare_hf_url_conversational(self):
+        helper = OVHcloudConversationalTask()
+        url = helper._prepare_url("hf_token", "username/repo_name")
+        assert url == "https://router.huggingface.co/ovhcloud/v1/chat/completions"
+
+    def test_prepare_url_conversational(self):
+        helper = OVHcloudConversationalTask()
+        url = helper._prepare_url("ovhcloud_token", "username/repo_name")
+        assert url == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/chat/completions"
+
+    def test_prepare_payload_as_dict(self):
+        helper = OVHcloudConversationalTask()
+        payload = helper._prepare_payload_as_dict(
+            [
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": "Hello!"},
+            ],
+            {
+                "max_tokens": 512,
+                "temperature": 0.15,
+                "top_p": 1,
+                "presence_penalty": 0,
+                "stream": True,
+            },
+            InferenceProviderMapping(
+                provider="ovhcloud",
+                hf_model_id="meta-llama/Llama-3.1-8B-Instruct",
+                providerId="Llama-3.1-8B-Instruct",
+                task="conversational",
+                status="live",
+            ),
+        )
+        assert payload == {
+            "max_tokens": 512,
+            "messages": [
+                {"content": "You are a helpful assistant", "role": "system"},
+                {"role": "user", "content": "Hello!"},
+            ],
+            "model": "Llama-3.1-8B-Instruct",
+            "presence_penalty": 0,
+            "stream": True,
+            "temperature": 0.15,
+            "top_p": 1,
+        }
+
+    def test_prepare_route_conversational(self):
+        helper = OVHcloudConversationalTask()
+        assert helper._prepare_route("username/repo_name", "hf_token") == "/v1/chat/completions"
+
+    def test_prepare_url_text_generation(self):
+        helper = OVHcloudTextGenerationTask()
+        url = helper._prepare_url("hf_token", "username/repo_name")
+        assert url == "https://router.huggingface.co/ovhcloud/v1/completions"
+
+        url = helper._prepare_url("ovhcloud_token", "username/repo_name")
+        assert url == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/completions"
+
+    def test_prepare_route_text_generation(self):
+        helper = OVHcloudTextGenerationTask()
+        assert helper._prepare_route("username/repo_name", "hf_token") == "/v1/completions"
+
+    def test_prepare_payload_as_dict_text_generation(self):
+        helper = OVHcloudTextGenerationTask()
+        payload = helper._prepare_payload_as_dict(
+            "Once upon a time",
+            {"temperature": 0.7, "max_tokens": 100},
+            InferenceProviderMapping(
+                provider="ovhcloud",
+                hf_model_id="meta-llama/Llama-3.1-8B-Instruct",
+                providerId="Llama-3.1-8B-Instruct",
+                task="text-generation",
+                status="live",
+            ),
+        )
+        assert payload == {
+            "prompt": "Once upon a time",
+            "temperature": 0.7,
+            "max_tokens": 100,
+            "model": "Llama-3.1-8B-Instruct",
+        }
+
+    def test_text_generation_get_response(self):
+        helper = OVHcloudTextGenerationTask()
+        response = helper.get_response(
+            {
+                "choices": [
+                    {
+                        "text": " there was a beautiful princess",
+                        "finish_reason": "stop",
+                        "seed": 42,
+                    }
+                ]
+            }
+        )
+        assert response == {
+            "generated_text": " there was a beautiful princess",
+            "details": {
+                "finish_reason": "stop",
+                "seed": 42,
+            },
+        }
+
+
 class TestReplicateProvider:
     def test_automatic_speech_recognition_payload(self):
         helper = ReplicateAutomaticSpeechRecognitionTask()