huggingface
diff --git a/‎docs/source/en/guides/inference.md
Lines changed: 30 additions & 30 deletions b/‎docs/source/en/guides/inference.md
Lines changed: 30 additions & 30 deletions
diff --git a/‎src/huggingface_hub/inference/_client.py
Lines changed: 1 addition & 1 deletion b/‎src/huggingface_hub/inference/_client.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/huggingface_hub/inference/_generated/_async_client.py
Lines changed: 1 addition & 1 deletion b/‎src/huggingface_hub/inference/_generated/_async_client.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/huggingface_hub/inference/_providers/__init__.py
Lines changed: 5 additions & 0 deletions b/‎src/huggingface_hub/inference/_providers/__init__.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/huggingface_hub/inference/_providers/black_forest_labs.py
Lines changed: 66 additions & 0 deletions b/‎src/huggingface_hub/inference/_providers/black_forest_labs.py
Lines changed: 66 additions & 0 deletions
@@ -248,36 +248,36 @@ You might wonder why using [`InferenceClient`] instead of OpenAI's client? There
 
 [`InferenceClient`]'s goal is to provide the easiest interface to run inference on Hugging Face models, on any provider. It has a simple API that supports the most common tasks. Here is a table showing which providers support which tasks:
 
-| Domain              | Task                                                | HF Inference | fal-ai | Fireworks AI | Hyperbolic | Nebius AI Studio | Novita AI | Replicate | Sambanova | Together |
-| ------------------- | --------------------------------------------------- | ------------ | ------ | --------- | ---------- | ---------------- | ------ | --------- | --------- | ----------- |
-| **Audio**           | [`~InferenceClient.audio_classification`]           | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.audio_to_audio`]                 | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.automatic_speech_recognition`]   | ✅            | ✅      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.text_to_speech`]                 | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ✅         | ❌         | ❌           |
-| **Computer Vision** | [`~InferenceClient.image_classification`]           | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.image_segmentation`]             | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.image_to_image`]                 | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.image_to_text`]                  | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.object_detection`]               | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.text_to_image`]                  | ✅            | ✅      | ❌         | ✅          | ✅                | ❌      | ✅         | ❌         | ✅           |
-|                     | [`~InferenceClient.text_to_video`]                  | ❌            | ✅      | ❌         | ❌          | ❌                | ❌      | ✅         | ❌         | ❌           |
-|                     | [`~InferenceClient.zero_shot_image_classification`] | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-| **Multimodal**      | [`~InferenceClient.document_question_answering`]    | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.visual_question_answering`]      | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-| **NLP**             | [`~InferenceClient.chat_completion`]                | ✅            | ❌      | ✅         | ✅          | ✅                | ✅      | ❌         | ✅         | ✅           |
-|                     | [`~InferenceClient.feature_extraction`]             | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.fill_mask`]                      | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.question_answering`]             | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.sentence_similarity`]            | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.summarization`]                  | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.table_question_answering`]       | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.text_classification`]            | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.text_generation`]                | ✅            | ❌      | ❌         | ✅          | ✅                | ✅      | ❌         | ❌         | ✅           |
-|                     | [`~InferenceClient.token_classification`]           | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.translation`]                    | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.zero_shot_classification`]       | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-| **Tabular**         | [`~InferenceClient.tabular_classification`]         | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
-|                     | [`~InferenceClient.tabular_regression`]             | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+| Domain              | Task                                                | Black Forest Labs | HF Inference | fal-ai | Fireworks AI | Hyperbolic | Nebius AI Studio | Novita AI | Replicate | Sambanova | Together |
+| ------------------- | --------------------------------------------------- | ---------------- | ------------ | ------ | --------- | ---------- | ---------------- | ------ | --------- | --------- | ----------- |
+| **Audio**           | [`~InferenceClient.audio_classification`]           | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.audio_to_audio`]                 | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.automatic_speech_recognition`]   | ❌                | ✅            | ✅      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.text_to_speech`]                 | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ✅         | ❌         | ❌           |
+| **Computer Vision** | [`~InferenceClient.image_classification`]           | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.image_segmentation`]             | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.image_to_image`]                 | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.image_to_text`]                  | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.object_detection`]               | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.text_to_image`]                  | ✅                | ✅            | ✅      | ❌         | ✅          | ✅                | ❌      | ✅         | ❌         | ✅           |
+|                     | [`~InferenceClient.text_to_video`]                  | ❌                | ❌            | ✅      | ❌         | ❌          | ❌                | ❌      | ✅         | ❌         | ❌           |
+|                     | [`~InferenceClient.zero_shot_image_classification`] | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+| **Multimodal**      | [`~InferenceClient.document_question_answering`]    | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.visual_question_answering`]      | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+| **NLP**             | [`~InferenceClient.chat_completion`]                | ❌                | ✅            | ❌      | ✅         | ✅          | ✅                | ✅      | ❌         | ✅         | ✅           |
+|                     | [`~InferenceClient.feature_extraction`]             | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.fill_mask`]                      | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.question_answering`]             | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.sentence_similarity`]            | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.summarization`]                  | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.table_question_answering`]       | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.text_classification`]            | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.text_generation`]                | ❌                | ✅            | ❌      | ❌         | ✅          | ✅                | ✅      | ❌         | ❌         | ✅           |
+|                     | [`~InferenceClient.token_classification`]           | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.translation`]                    | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.zero_shot_classification`]       | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+| **Tabular**         | [`~InferenceClient.tabular_classification`]         | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
+|                     | [`~InferenceClient.tabular_regression`]             | ❌                | ✅            | ❌      | ❌         | ❌          | ❌                | ❌      | ❌         | ❌         | ❌           |
 
 <Tip>
 
 
@@ -132,7 +132,7 @@ class InferenceClient:
             path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
             documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be "fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
             defaults to hf-inference (Hugging Face Serverless Inference API).
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str` or `bool`, *optional*):
 
@@ -120,7 +120,7 @@ class AsyncInferenceClient:
             path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
             documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be "fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
             defaults to hf-inference (Hugging Face Serverless Inference API).
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str` or `bool`, *optional*):
 
@@ -1,6 +1,7 @@
 from typing import Dict, Literal
 
 from ._common import TaskProviderHelper
+from .black_forest_labs import BlackForestLabsTextToImageTask
 from .fal_ai import (
     FalAIAutomaticSpeechRecognitionTask,
     FalAITextToImageTask,
@@ -18,6 +19,7 @@
 
 
 PROVIDER_T = Literal[
+    "black-forest-labs",
     "fal-ai",
     "fireworks-ai",
     "hf-inference",
@@ -30,6 +32,9 @@
 ]
 
 PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
+    "black-forest-labs": {
+        "text-to-image": BlackForestLabsTextToImageTask(),
+    },
     "fal-ai": {
         "automatic-speech-recognition": FalAIAutomaticSpeechRecognitionTask(),
         "text-to-image": FalAITextToImageTask(),
 
@@ -0,0 +1,66 @@
+import time
+from typing import Any, Dict, Optional, Union
+
+from huggingface_hub.inference._common import _as_dict
+from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
+from huggingface_hub.utils import logging
+from huggingface_hub.utils._http import get_session
+
+
+logger = logging.get_logger(__name__)
+
+MAX_POLLING_ATTEMPTS = 6
+POLLING_INTERVAL = 1.0
+
+
+class BlackForestLabsTextToImageTask(TaskProviderHelper):
+    def __init__(self):
+        super().__init__(provider="black-forest-labs", base_url="https://api.us1.bfl.ai/v1", task="text-to-image")
+
+    def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
+        headers = super()._prepare_headers(headers, api_key)
+        if not api_key.startswith("hf_"):
+            _ = headers.pop("authorization")
+            headers["X-Key"] = api_key
+        return headers
+
+    def _prepare_route(self, mapped_model: str) -> str:
+        return mapped_model
+
+    def _prepare_payload_as_dict(self, inputs: Any, parameters: Dict, mapped_model: str) -> Optional[Dict]:
+        parameters = filter_none(parameters)
+        if "num_inference_steps" in parameters:
+            parameters["steps"] = parameters.pop("num_inference_steps")
+        if "guidance_scale" in parameters:
+            parameters["guidance"] = parameters.pop("guidance_scale")
+
+        return {"prompt": inputs, **parameters}
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        """
+        Polling mechanism for Black Forest Labs since the API is asynchronous.
+        """
+        url = _as_dict(response).get("polling_url")
+        session = get_session()
+        for _ in range(MAX_POLLING_ATTEMPTS):
+            time.sleep(POLLING_INTERVAL)
+
+            response = session.get(url, headers={"Content-Type": "application/json"})  # type: ignore
+            response.raise_for_status()  # type: ignore
+            response_json: Dict = response.json()  # type: ignore
+            status = response_json.get("status")
+            logger.info(
+                f"Polling generation result from {url}. Current status: {status}. "
+                f"Will retry after {POLLING_INTERVAL} seconds if not ready."
+            )
+
+            if (
+                status == "Ready"
+                and isinstance(response_json.get("result"), dict)
+                and (sample_url := response_json["result"].get("sample"))
+            ):
+                image_resp = session.get(sample_url)
+                image_resp.raise_for_status()
+                return image_resp.content
+
+        raise TimeoutError(f"Failed to get the image URL after {MAX_POLLING_ATTEMPTS} attempts.")