diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
index ed9fe578b7..d274c6b8ca 100644
--- a/src/huggingface_hub/inference/_client.py
+++ b/src/huggingface_hub/inference/_client.py
@@ -404,11 +404,7 @@ def audio_classification(
         parameters = {"function_to_apply": function_to_apply, "top_k": top_k}
         provider_helper = get_provider_helper(self.provider, task="audio-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(
-            audio,
-            parameters=parameters,
-            expect_binary=True,
-        )
+        payload = provider_helper.prepare_payload(audio, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="audio-classification")
         return AudioClassificationOutputElement.parse_obj_as_list(response)
 
@@ -1008,7 +1004,7 @@ def document_question_answering(
         }
         provider_helper = get_provider_helper(self.provider, task="document-question-answering")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="document-question-answering")
         return DocumentQuestionAnsweringOutputElement.parse_obj_as_list(response)
 
@@ -1075,7 +1071,7 @@ def feature_extraction(
         }
         provider_helper = get_provider_helper(self.provider, task="feature-extraction")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="feature-extraction")
         np = _import_numpy()
         return np.array(_bytes_to_dict(response), dtype="float32")
@@ -1127,7 +1123,7 @@ def fill_mask(
         parameters = {"targets": targets, "top_k": top_k}
         provider_helper = get_provider_helper(self.provider, task="fill-mask")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="fill-mask")
         return FillMaskOutputElement.parse_obj_as_list(response)
 
@@ -1172,7 +1168,7 @@ def image_classification(
         parameters = {"function_to_apply": function_to_apply, "top_k": top_k}
         provider_helper = get_provider_helper(self.provider, task="image-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="image-classification")
         return ImageClassificationOutputElement.parse_obj_as_list(response)
 
@@ -1234,7 +1230,7 @@ def image_segmentation(
         }
         provider_helper = get_provider_helper(self.provider, task="image-segmentation")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="image-segmentation")
         output = ImageSegmentationOutputElement.parse_obj_as_list(response)
         for item in output:
@@ -1308,7 +1304,7 @@ def image_to_image(
         }
         provider_helper = get_provider_helper(self.provider, task="image-to-image")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="image-to-image")
         return _bytes_to_image(response)
 
@@ -1478,7 +1474,7 @@ def object_detection(
         }
         provider_helper = get_provider_helper(self.provider, task="object-detection")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="object-detection")
         return ObjectDetectionOutputElement.parse_obj_as_list(response)
 
@@ -1556,7 +1552,7 @@ def question_answering(
         inputs: Dict[str, Any] = {"question": question, "context": context}
         provider_helper = get_provider_helper(self.provider, task="question-answering")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="question-answering")
         # Parse the response as a single `QuestionAnsweringOutputElement` when top_k is 1 or not provided, or a list of `QuestionAnsweringOutputElement` to ensure backward compatibility.
         output = QuestionAnsweringOutputElement.parse_obj(response)
@@ -1672,7 +1668,7 @@ def summarization(
             }
         provider_helper = get_provider_helper(self.provider, task="summarization")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = self.post(**payload, model=model, task="summarization")
         return SummarizationOutput.parse_obj_as_list(response)[0]
 
@@ -1737,7 +1733,7 @@ def table_question_answering(
         }
         provider_helper = get_provider_helper(self.provider, task="table-question-answering")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters, model=model)
         response = self.post(
             **payload,
             model=model,
@@ -1887,7 +1883,7 @@ def text_classification(
         }
         provider_helper = get_provider_helper(self.provider, task="text-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = self.post(
             **payload,
             model=model,
@@ -2482,11 +2478,7 @@ def text_to_image(
 
         provider_helper = get_provider_helper(self.provider, task="text-to-image")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(
-            prompt,
-            parameters=parameters,
-            model=model,
-        )
+        payload = provider_helper.prepare_payload(prompt, parameters=parameters, model=model)
 
         response = self.post(**payload, model=model, task="text-to-image")
         response = provider_helper.get_response(response)
@@ -3012,7 +3004,7 @@ def zero_shot_image_classification(
         }
         provider_helper = get_provider_helper(self.provider, task="zero-shot-image-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(image, parameters=parameters, model=model, expect_binary=True)
+        payload = provider_helper.prepare_payload(image, parameters=parameters, model=model)
         response = self.post(
             **payload,
             model=model,
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
index a3bc9001c3..9eb14a2549 100644
--- a/src/huggingface_hub/inference/_generated/_async_client.py
+++ b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -438,11 +438,7 @@ async def audio_classification(
         parameters = {"function_to_apply": function_to_apply, "top_k": top_k}
         provider_helper = get_provider_helper(self.provider, task="audio-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(
-            audio,
-            parameters=parameters,
-            expect_binary=True,
-        )
+        payload = provider_helper.prepare_payload(audio, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="audio-classification")
         return AudioClassificationOutputElement.parse_obj_as_list(response)
 
@@ -1051,7 +1047,7 @@ async def document_question_answering(
         }
         provider_helper = get_provider_helper(self.provider, task="document-question-answering")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="document-question-answering")
         return DocumentQuestionAnsweringOutputElement.parse_obj_as_list(response)
 
@@ -1119,7 +1115,7 @@ async def feature_extraction(
         }
         provider_helper = get_provider_helper(self.provider, task="feature-extraction")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="feature-extraction")
         np = _import_numpy()
         return np.array(_bytes_to_dict(response), dtype="float32")
@@ -1172,7 +1168,7 @@ async def fill_mask(
         parameters = {"targets": targets, "top_k": top_k}
         provider_helper = get_provider_helper(self.provider, task="fill-mask")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="fill-mask")
         return FillMaskOutputElement.parse_obj_as_list(response)
 
@@ -1218,7 +1214,7 @@ async def image_classification(
         parameters = {"function_to_apply": function_to_apply, "top_k": top_k}
         provider_helper = get_provider_helper(self.provider, task="image-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="image-classification")
         return ImageClassificationOutputElement.parse_obj_as_list(response)
 
@@ -1281,7 +1277,7 @@ async def image_segmentation(
         }
         provider_helper = get_provider_helper(self.provider, task="image-segmentation")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="image-segmentation")
         output = ImageSegmentationOutputElement.parse_obj_as_list(response)
         for item in output:
@@ -1356,7 +1352,7 @@ async def image_to_image(
         }
         provider_helper = get_provider_helper(self.provider, task="image-to-image")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="image-to-image")
         return _bytes_to_image(response)
 
@@ -1534,7 +1530,7 @@ async def object_detection(
         }
         provider_helper = get_provider_helper(self.provider, task="object-detection")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, expect_binary=True)
+        payload = provider_helper.prepare_payload(inputs=image, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="object-detection")
         return ObjectDetectionOutputElement.parse_obj_as_list(response)
 
@@ -1613,7 +1609,7 @@ async def question_answering(
         inputs: Dict[str, Any] = {"question": question, "context": context}
         provider_helper = get_provider_helper(self.provider, task="question-answering")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="question-answering")
         # Parse the response as a single `QuestionAnsweringOutputElement` when top_k is 1 or not provided, or a list of `QuestionAnsweringOutputElement` to ensure backward compatibility.
         output = QuestionAnsweringOutputElement.parse_obj(response)
@@ -1731,7 +1727,7 @@ async def summarization(
             }
         provider_helper = get_provider_helper(self.provider, task="summarization")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = await self.post(**payload, model=model, task="summarization")
         return SummarizationOutput.parse_obj_as_list(response)[0]
 
@@ -1797,7 +1793,7 @@ async def table_question_answering(
         }
         provider_helper = get_provider_helper(self.provider, task="table-question-answering")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=inputs, parameters=parameters, model=model)
         response = await self.post(
             **payload,
             model=model,
@@ -1950,7 +1946,7 @@ async def text_classification(
         }
         provider_helper = get_provider_helper(self.provider, task="text-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters)
+        payload = provider_helper.prepare_payload(inputs=text, parameters=parameters, model=model)
         response = await self.post(
             **payload,
             model=model,
@@ -2547,11 +2543,7 @@ async def text_to_image(
 
         provider_helper = get_provider_helper(self.provider, task="text-to-image")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(
-            prompt,
-            parameters=parameters,
-            model=model,
-        )
+        payload = provider_helper.prepare_payload(prompt, parameters=parameters, model=model)
 
         response = await self.post(**payload, model=model, task="text-to-image")
         response = provider_helper.get_response(response)
@@ -3084,7 +3076,7 @@ async def zero_shot_image_classification(
         }
         provider_helper = get_provider_helper(self.provider, task="zero-shot-image-classification")
         model = provider_helper.map_model(model=model or self.model)
-        payload = provider_helper.prepare_payload(image, parameters=parameters, model=model, expect_binary=True)
+        payload = provider_helper.prepare_payload(image, parameters=parameters, model=model)
         response = await self.post(
             **payload,
             model=model,
diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py
index 0368b55454..e6ff7cf797 100644
--- a/src/huggingface_hub/inference/_providers/__init__.py
+++ b/src/huggingface_hub/inference/_providers/__init__.py
@@ -1,60 +1,60 @@
+# mypy: disable-error-code="dict-item"
 from typing import Any, Dict, Optional, Protocol, Union
 
-from . import fal_ai, hf_inference, replicate, sambanova, together
+from . import fal_ai, replicate, sambanova, together
+from .hf_inference import HFInferenceBinaryInputTask, HFInferenceConversational, HFInferenceTask
 
 
 class TaskProviderHelper(Protocol):
     """Protocol defining the interface for task-specific provider helpers."""
 
-    def build_url(model: Optional[str] = None) -> str: ...
-    def map_model(model: Optional[str] = None) -> str: ...
-    def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict: ...
-    def prepare_payload(
-        inputs: Any, parameters: Dict[str, Any], model: Optional[str] = None, expect_binary: bool = False
-    ) -> Dict[str, Any]: ...
-    def get_response(response: Union[bytes, Dict]) -> Any: ...
+    def build_url(self, model: Optional[str] = None) -> str: ...
+    def map_model(self, model: Optional[str] = None) -> str: ...
+    def prepare_headers(self, headers: Dict, *, token: Optional[str] = None) -> Dict: ...
+    def prepare_payload(self, inputs: Any, parameters: Dict[str, Any], model: Optional[str]) -> Dict[str, Any]: ...
+    def get_response(self, response: Union[bytes, Dict]) -> Any: ...
 
 
 PROVIDERS: Dict[str, Dict[str, TaskProviderHelper]] = {
     "replicate": {
-        "text-to-image": replicate.text_to_image,  # type: ignore
+        "text-to-image": replicate.text_to_image,
     },
     "fal-ai": {
-        "text-to-image": fal_ai.text_to_image,  # type: ignore
+        "text-to-image": fal_ai.text_to_image,
         # TODO: add automatic-speech-recognition
     },
     "sambanova": {
-        "conversational": sambanova.conversational,  # type: ignore
+        "conversational": sambanova.conversational,
     },
     "together": {
-        "text-to-image": together.text_to_image,  # type: ignore
-        "conversational": together.conversational,  # type: ignore
-        "text-generation": together.text_generation,  # type: ignore
+        "text-to-image": together.text_to_image,
+        "conversational": together.conversational,
+        "text-generation": together.text_generation,
     },
     "hf-inference": {
-        "text-to-image": hf_inference.text_to_image,
-        "conversational": hf_inference.conversational,
-        "text-classification": hf_inference.text_classification,
-        "question-answering": hf_inference.question_answering,
-        "audio-classification": hf_inference.audio_classification,
-        "automatic-speech-recognition": hf_inference.automatic_speech_recognition,
-        "fill-mask": hf_inference.fill_mask,
-        "feature-extraction": hf_inference.feature_extraction,
-        "image-classification": hf_inference.image_classification,
-        "image-segmentation": hf_inference.image_segmentation,
-        "document-question-answering": hf_inference.document_question_answering,
-        "image-to-text": hf_inference.image_to_text,
-        "object-detection": hf_inference.object_detection,
-        "audio-to-audio": hf_inference.audio_to_audio,
-        "zero-shot-image-classification": hf_inference.zero_shot_image_classification,
-        "zero-shot-classification": hf_inference.zero_shot_classification,
-        "image-to-image": hf_inference.image_to_image,
-        "sentence-similarity": hf_inference.sentence_similarity,
-        "table-question-answering": hf_inference.table_question_answering,
-        "tabular-classification": hf_inference.tabular_classification,
-        "text-to-speech": hf_inference.text_to_speech,
-        "token-classification": hf_inference.token_classification,
-        "translation": hf_inference.translation,
+        "text-to-image": HFInferenceTask("text-to-image"),
+        "conversational": HFInferenceConversational(),
+        "text-classification": HFInferenceTask("text-classification"),
+        "question-answering": HFInferenceTask("question-answering"),
+        "audio-classification": HFInferenceBinaryInputTask("audio-classification"),
+        "automatic-speech-recognition": HFInferenceTask("automatic-speech-recognition"),
+        "fill-mask": HFInferenceTask("fill-mask"),
+        "feature-extraction": HFInferenceTask("feature-extraction"),
+        "image-classification": HFInferenceBinaryInputTask("image-classification"),
+        "image-segmentation": HFInferenceBinaryInputTask("image-segmentation"),
+        "document-question-answering": HFInferenceTask("document-question-answering"),
+        "image-to-text": HFInferenceTask("image-to-text"),
+        "object-detection": HFInferenceBinaryInputTask("object-detection"),
+        "audio-to-audio": HFInferenceTask("audio-to-audio"),
+        "zero-shot-image-classification": HFInferenceBinaryInputTask("zero-shot-image-classification"),
+        "zero-shot-classification": HFInferenceTask("zero-shot-classification"),
+        "image-to-image": HFInferenceBinaryInputTask("image-to-image"),
+        "sentence-similarity": HFInferenceTask("sentence-similarity"),
+        "table-question-answering": HFInferenceTask("table-question-answering"),
+        "tabular-classification": HFInferenceTask("tabular-classification"),
+        "text-to-speech": HFInferenceTask("text-to-speech"),
+        "token-classification": HFInferenceTask("token-classification"),
+        "translation": HFInferenceTask("translation"),
     },
 }
 
diff --git a/src/huggingface_hub/inference/_providers/fal_ai/text_to_image.py b/src/huggingface_hub/inference/_providers/fal_ai/text_to_image.py
index ad5a7bad0a..a2108a96e8 100644
--- a/src/huggingface_hub/inference/_providers/fal_ai/text_to_image.py
+++ b/src/huggingface_hub/inference/_providers/fal_ai/text_to_image.py
@@ -30,13 +30,7 @@ def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict:
     }
 
 
-def prepare_payload(
-    inputs: Any,
-    parameters: Dict[str, Any],
-    model: Optional[str] = None,
-    *,
-    expect_binary: bool = False,
-) -> Dict[str, Any]:
+def prepare_payload(inputs: Any, parameters: Dict[str, Any], model: Optional[str]) -> Dict[str, Any]:
     parameters = {k: v for k, v in parameters.items() if v is not None}
     return {"json": {"prompt": inputs, **parameters}}
 
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/_common.py b/src/huggingface_hub/inference/_providers/hf_inference.py
similarity index 50%
rename from src/huggingface_hub/inference/_providers/hf_inference/_common.py
rename to src/huggingface_hub/inference/_providers/hf_inference.py
index a17b167e9a..948d1292b4 100644
--- a/src/huggingface_hub/inference/_providers/hf_inference/_common.py
+++ b/src/huggingface_hub/inference/_providers/hf_inference.py
@@ -1,27 +1,12 @@
 import logging
 from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    BinaryIO,
-    Dict,
-    List,
-    Optional,
-    Union,
-)
+from typing import Any, BinaryIO, Dict, List, Optional, Union
 
 from huggingface_hub.constants import ENDPOINT
-from huggingface_hub.inference._common import _b64_encode
-from huggingface_hub.utils import (
-    build_hf_headers,
-    get_session,
-    hf_raise_for_status,
-)
+from huggingface_hub.inference._common import _b64_encode, _open_as_binary
+from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
 
 
-if TYPE_CHECKING:
-    pass
-
 # TYPES
 UrlT = str
 PathT = Union[str, Path]
@@ -39,6 +24,8 @@
 # Will be globally fetched only once (see '_fetch_recommended_models')
 _RECOMMENDED_MODELS: Optional[Dict[str, Optional[str]]] = None
 
+BASE_URL = "https://api-inference.huggingface.co"
+
 
 def _first_or_none(items: List[Any]) -> Optional[Any]:
     try:
@@ -82,67 +69,66 @@ def get_recommended_model(task: str) -> str:
     return model
 
 
-class BaseInferenceTask:
+class HFInferenceTask:
     """Base class for HF Inference API tasks."""
 
-    BASE_URL = "https://api-inference.huggingface.co"
-    TASK_NAME: str = ""  # To be defined by subclasses
+    def __init__(self, task: str):
+        self.task = task
 
-    @classmethod
-    def build_url(cls, model: Optional[str] = None) -> str:
+    def build_url(self, model: Optional[str] = None) -> str:
         if model is None:
-            model = get_recommended_model(cls.TASK_NAME)
-        return f"{cls.BASE_URL}/models/{model}"
+            model = get_recommended_model(self.task)
+        return f"{BASE_URL}/models/{model}"
 
-    @staticmethod
-    def map_model(model: str) -> str:
+    def map_model(self, model: str) -> str:
         return model
 
-    @staticmethod
-    def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict:
+    def prepare_headers(self, headers: Dict, *, token: Optional[str] = None) -> Dict:
         return headers
 
-    @classmethod
-    def prepare_payload(
-        cls,
-        inputs: Any,
-        parameters: Dict[str, Any],
-        model: Optional[str] = None,
-        *,
-        expect_binary: bool = False,
-    ) -> Dict[str, Any]:
-        """
-        Prepare the payload for an API request, handling various input types and parameters.
-        `expect_binary` is set to `True` when the inputs are a binary object or a local path or URL. This is the case for image and audio inputs.
-        """
-        if parameters is None:
-            parameters = {}
+    def prepare_payload(self, inputs: Any, parameters: Dict[str, Any], model: Optional[str]) -> Dict[str, Any]:
+        if isinstance(inputs, (bytes, Path)):
+            raise ValueError(f"Unexpected binary inputs. Got {inputs}")  # type: ignore
+
+        return {
+            "json": {
+                inputs: inputs,
+                parameters: {k: v for k, v in parameters.items() if v is not None},
+            }
+        }
+
+    def get_response(self, response: Union[bytes, Dict]) -> Any:
+        return response
+
+
+class HFInferenceBinaryInputTask(HFInferenceTask):
+    def prepare_payload(self, inputs: Any, parameters: Dict[str, Any], model: Optional[str]) -> Dict[str, Any]:
         parameters = {k: v for k, v in parameters.items() if v is not None}
         has_parameters = len(parameters) > 0
 
-        is_binary = isinstance(inputs, (bytes, Path))
-        # If expect_binary is True, inputs must be a binary object or a local path or a URL.
-        if expect_binary and not is_binary and not isinstance(inputs, str):
-            raise ValueError(f"Expected binary inputs or a local path or a URL. Got {inputs}")  # type: ignore
+        # Raise if not a binary object or a local path or a URL.
+        if not isinstance(inputs, (bytes, Path)) and not isinstance(inputs, str):
+            raise ValueError(f"Expected binary inputs or a local path or a URL. Got {inputs}")
+
         # Send inputs as raw content when no parameters are provided
-        if expect_binary and not has_parameters:
-            return {"data": inputs}
-        # If expect_binary is False, inputs must not be a binary object.
-        if not expect_binary and is_binary:
-            raise ValueError(f"Unexpected binary inputs. Got {inputs}")  # type: ignore
+        if not has_parameters:
+            with _open_as_binary(inputs) as data:
+                data_as_bytes = data if isinstance(data, bytes) else data.read()
+                return {"data": data_as_bytes}
 
-        json: Dict[str, Any] = {}
-        # If inputs is a bytes-like object, encode it to base64
-        if expect_binary:
-            json["inputs"] = _b64_encode(inputs)  # type: ignore
-        # Otherwise (string, dict, list) send it as is
-        else:
-            json["inputs"] = inputs
-        # Add parameters to the json payload if any
-        if has_parameters:
-            json["parameters"] = parameters
-        return {"json": json}
-
-    @staticmethod
-    def get_response(response: Union[bytes, Dict]) -> Any:
-        return response
+        # Otherwise encode as b64
+        return {"json": {"inputs": _b64_encode(inputs), "parameters": parameters}}
+
+
+class HFInferenceConversational(HFInferenceTask):
+    def __init__(self):
+        super().__init__("conversational")
+
+    def build_url(self, model: Optional[str] = None) -> str:
+        if model is None:
+            model = get_recommended_model("text-generation")
+        return f"{BASE_URL}/models/{model}/v1/chat/completions"
+
+    def prepare_payload(self, inputs: Any, parameters: Dict[str, Any], model: Optional[str]) -> Dict[str, Any]:
+        parameters = {key: value for key, value in parameters.items() if value is not None}
+        return {"model": model, "messages": inputs, **parameters}
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/__init__.py b/src/huggingface_hub/inference/_providers/hf_inference/__init__.py
deleted file mode 100644
index 2a2884bd1e..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/__init__.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# ruff: noqa: F401
-# Import modules individually to avoid circular dependencies
-from . import (
-    _common,
-    audio_classification,
-    audio_to_audio,
-    automatic_speech_recognition,
-    conversational,
-    document_question_answering,
-    feature_extraction,
-    fill_mask,
-    image_classification,
-    image_segmentation,
-    image_to_image,
-    image_to_text,
-    object_detection,
-    question_answering,
-    sentence_similarity,
-    table_question_answering,
-    tabular_classification,
-    text_classification,
-    text_to_image,
-    text_to_speech,
-    token_classification,
-    translation,
-    zero_shot_classification,
-    zero_shot_image_classification,
-)
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/audio_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/audio_classification.py
deleted file mode 100644
index 4d927c5953..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/audio_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class AudioClassification(BaseInferenceTask):
-    TASK_NAME = "audio-classification"
-
-
-build_url = AudioClassification.build_url
-map_model = AudioClassification.map_model
-prepare_headers = AudioClassification.prepare_headers
-prepare_payload = AudioClassification.prepare_payload
-get_response = AudioClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/audio_to_audio.py b/src/huggingface_hub/inference/_providers/hf_inference/audio_to_audio.py
deleted file mode 100644
index 121526e41c..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/audio_to_audio.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class AudioToAudio(BaseInferenceTask):
-    TASK_NAME = "audio-to-audio"
-
-
-build_url = AudioToAudio.build_url
-map_model = AudioToAudio.map_model
-prepare_headers = AudioToAudio.prepare_headers
-prepare_payload = AudioToAudio.prepare_payload
-get_response = AudioToAudio.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/automatic_speech_recognition.py b/src/huggingface_hub/inference/_providers/hf_inference/automatic_speech_recognition.py
deleted file mode 100644
index 0817c26a5b..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/automatic_speech_recognition.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class AutomaticSpeechRecognition(BaseInferenceTask):
-    TASK_NAME = "automatic-speech-recognition"
-
-
-build_url = AutomaticSpeechRecognition.build_url
-map_model = AutomaticSpeechRecognition.map_model
-prepare_headers = AutomaticSpeechRecognition.prepare_headers
-prepare_payload = AutomaticSpeechRecognition.prepare_payload
-get_response = AutomaticSpeechRecognition.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/conversational.py b/src/huggingface_hub/inference/_providers/hf_inference/conversational.py
deleted file mode 100644
index 7bfd9804ca..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/conversational.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from typing import Any, Dict, Optional
-
-from ._common import BaseInferenceTask, get_recommended_model
-
-
-class Conversational(BaseInferenceTask):
-    TASK_NAME = "text-generation"
-
-    @classmethod
-    def build_url(cls, model: Optional[str] = None) -> str:
-        if model is None:
-            model = get_recommended_model(cls.TASK_NAME)
-        url = f"{cls.BASE_URL}/models/{model}"
-        url = url.rstrip("/")
-        if url.endswith("/v1"):
-            url += "/chat/completions"
-        elif not url.endswith("/chat/completions"):
-            url += "/v1/chat/completions"
-        return url
-
-    @classmethod
-    def prepare_payload(
-        cls,
-        inputs: Any,
-        parameters: Dict[str, Any],
-        model: Optional[str] = None,
-        *,
-        expect_binary: bool = False,
-    ) -> Dict[str, Any]:
-        payload = {
-            "model": model,
-            "messages": inputs,
-            **parameters,
-        }
-        return {key: value for key, value in payload.items() if value is not None}
-
-
-build_url = Conversational.build_url
-map_model = Conversational.map_model
-prepare_headers = Conversational.prepare_headers
-prepare_payload = Conversational.prepare_payload
-get_response = Conversational.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/document_question_answering.py b/src/huggingface_hub/inference/_providers/hf_inference/document_question_answering.py
deleted file mode 100644
index 4fffa9dc9b..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/document_question_answering.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class DocumentQuestionAnswering(BaseInferenceTask):
-    TASK_NAME = "document-question-answering"
-
-
-build_url = DocumentQuestionAnswering.build_url
-map_model = DocumentQuestionAnswering.map_model
-prepare_headers = DocumentQuestionAnswering.prepare_headers
-prepare_payload = DocumentQuestionAnswering.prepare_payload
-get_response = DocumentQuestionAnswering.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/feature_extraction.py b/src/huggingface_hub/inference/_providers/hf_inference/feature_extraction.py
deleted file mode 100644
index 33d9961d43..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/feature_extraction.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class FeatureExtraction(BaseInferenceTask):
-    TASK_NAME = "feature-extraction"
-
-
-build_url = FeatureExtraction.build_url
-map_model = FeatureExtraction.map_model
-prepare_headers = FeatureExtraction.prepare_headers
-prepare_payload = FeatureExtraction.prepare_payload
-get_response = FeatureExtraction.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/fill_mask.py b/src/huggingface_hub/inference/_providers/hf_inference/fill_mask.py
deleted file mode 100644
index d766040bfb..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/fill_mask.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class FillMask(BaseInferenceTask):
-    TASK_NAME = "fill-mask"
-
-
-build_url = FillMask.build_url
-map_model = FillMask.map_model
-prepare_headers = FillMask.prepare_headers
-prepare_payload = FillMask.prepare_payload
-get_response = FillMask.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/image_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/image_classification.py
deleted file mode 100644
index 9e72ce26bf..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/image_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ImageClassification(BaseInferenceTask):
-    TASK_NAME = "image-classification"
-
-
-build_url = ImageClassification.build_url
-map_model = ImageClassification.map_model
-prepare_headers = ImageClassification.prepare_headers
-prepare_payload = ImageClassification.prepare_payload
-get_response = ImageClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/image_segmentation.py b/src/huggingface_hub/inference/_providers/hf_inference/image_segmentation.py
deleted file mode 100644
index 183ee164d5..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/image_segmentation.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ImageSegmentation(BaseInferenceTask):
-    TASK_NAME = "image-segmentation"
-
-
-build_url = ImageSegmentation.build_url
-map_model = ImageSegmentation.map_model
-prepare_headers = ImageSegmentation.prepare_headers
-prepare_payload = ImageSegmentation.prepare_payload
-get_response = ImageSegmentation.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/image_to_image.py b/src/huggingface_hub/inference/_providers/hf_inference/image_to_image.py
deleted file mode 100644
index 51fb67b3b7..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/image_to_image.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ImageToImage(BaseInferenceTask):
-    TASK_NAME = "image-to-image"
-
-
-build_url = ImageToImage.build_url
-map_model = ImageToImage.map_model
-prepare_headers = ImageToImage.prepare_headers
-prepare_payload = ImageToImage.prepare_payload
-get_response = ImageToImage.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/image_to_text.py b/src/huggingface_hub/inference/_providers/hf_inference/image_to_text.py
deleted file mode 100644
index bf2acdbb4f..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/image_to_text.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ImageToText(BaseInferenceTask):
-    TASK_NAME = "image-to-text"
-
-
-build_url = ImageToText.build_url
-map_model = ImageToText.map_model
-prepare_headers = ImageToText.prepare_headers
-prepare_payload = ImageToText.prepare_payload
-get_response = ImageToText.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/image_zero_shot_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/image_zero_shot_classification.py
deleted file mode 100644
index 7b2855c3ef..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/image_zero_shot_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ImageZeroShotClassification(BaseInferenceTask):
-    TASK_NAME = "image-zero-shot-classification"
-
-
-build_url = ImageZeroShotClassification.build_url
-map_model = ImageZeroShotClassification.map_model
-prepare_headers = ImageZeroShotClassification.prepare_headers
-prepare_payload = ImageZeroShotClassification.prepare_payload
-get_response = ImageZeroShotClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/object_detection.py b/src/huggingface_hub/inference/_providers/hf_inference/object_detection.py
deleted file mode 100644
index 3e06fd7d47..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/object_detection.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ObjectDetection(BaseInferenceTask):
-    TASK_NAME = "object-detection"
-
-
-build_url = ObjectDetection.build_url
-map_model = ObjectDetection.map_model
-prepare_headers = ObjectDetection.prepare_headers
-prepare_payload = ObjectDetection.prepare_payload
-get_response = ObjectDetection.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/question_answering.py b/src/huggingface_hub/inference/_providers/hf_inference/question_answering.py
deleted file mode 100644
index 06ecac3b95..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/question_answering.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class QuestionAnswering(BaseInferenceTask):
-    TASK_NAME = "question-answering"
-
-
-build_url = QuestionAnswering.build_url
-map_model = QuestionAnswering.map_model
-prepare_headers = QuestionAnswering.prepare_headers
-prepare_payload = QuestionAnswering.prepare_payload
-get_response = QuestionAnswering.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/sentence_similarity.py b/src/huggingface_hub/inference/_providers/hf_inference/sentence_similarity.py
deleted file mode 100644
index 6eb05ae5a1..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/sentence_similarity.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class SentenceSimilarity(BaseInferenceTask):
-    TASK_NAME = "sentence-similarity"
-
-
-build_url = SentenceSimilarity.build_url
-map_model = SentenceSimilarity.map_model
-prepare_headers = SentenceSimilarity.prepare_headers
-prepare_payload = SentenceSimilarity.prepare_payload
-get_response = SentenceSimilarity.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/summarization.py b/src/huggingface_hub/inference/_providers/hf_inference/summarization.py
deleted file mode 100644
index f0357652e8..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/summarization.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class Summarization(BaseInferenceTask):
-    TASK_NAME = "summarization"
-
-
-build_url = Summarization.build_url
-map_model = Summarization.map_model
-prepare_headers = Summarization.prepare_headers
-prepare_payload = Summarization.prepare_payload
-get_response = Summarization.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/table_question_answering.py b/src/huggingface_hub/inference/_providers/hf_inference/table_question_answering.py
deleted file mode 100644
index 35ea757cee..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/table_question_answering.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class TableQuestionAnswering(BaseInferenceTask):
-    TASK_NAME = "table-question-answering"
-
-
-build_url = TableQuestionAnswering.build_url
-map_model = TableQuestionAnswering.map_model
-prepare_headers = TableQuestionAnswering.prepare_headers
-prepare_payload = TableQuestionAnswering.prepare_payload
-get_response = TableQuestionAnswering.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/tabular_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/tabular_classification.py
deleted file mode 100644
index 82273c2358..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/tabular_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class TabularClassification(BaseInferenceTask):
-    TASK_NAME = "tabular-classification"
-
-
-build_url = TabularClassification.build_url
-map_model = TabularClassification.map_model
-prepare_headers = TabularClassification.prepare_headers
-prepare_payload = TabularClassification.prepare_payload
-get_response = TabularClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/tabular_regression.py b/src/huggingface_hub/inference/_providers/hf_inference/tabular_regression.py
deleted file mode 100644
index 88bd726750..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/tabular_regression.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class TabularRegression(BaseInferenceTask):
-    TASK_NAME = "tabular-regression"
-
-
-build_url = TabularRegression.build_url
-map_model = TabularRegression.map_model
-prepare_headers = TabularRegression.prepare_headers
-prepare_payload = TabularRegression.prepare_payload
-get_response = TabularRegression.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/text_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/text_classification.py
deleted file mode 100644
index ecd7c4213f..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/text_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class TextClassification(BaseInferenceTask):
-    TASK_NAME = "text-classification"
-
-
-build_url = TextClassification.build_url
-map_model = TextClassification.map_model
-prepare_headers = TextClassification.prepare_headers
-prepare_payload = TextClassification.prepare_payload
-get_response = TextClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/text_to_image.py b/src/huggingface_hub/inference/_providers/hf_inference/text_to_image.py
deleted file mode 100644
index cbb7433880..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/text_to_image.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from typing import Any, Dict, Optional
-
-from ._common import BaseInferenceTask
-
-
-class TextToImage(BaseInferenceTask):
-    TASK_NAME = "text-to-image"
-
-    @classmethod
-    def prepare_payload(
-        cls,
-        inputs: Any,
-        parameters: Dict[str, Any],
-        model: Optional[str] = None,
-        *,
-        expect_binary: bool = False,
-    ) -> Dict[str, Any]:
-        payload = {
-            "model": model,
-            "messages": inputs,
-            **parameters,
-        }
-        return {key: value for key, value in payload.items() if value is not None}
-
-
-build_url = TextToImage.build_url
-map_model = TextToImage.map_model
-prepare_headers = TextToImage.prepare_headers
-prepare_payload = TextToImage.prepare_payload
-get_response = TextToImage.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/text_to_speech.py b/src/huggingface_hub/inference/_providers/hf_inference/text_to_speech.py
deleted file mode 100644
index 26f861c91e..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/text_to_speech.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class TextToSpeech(BaseInferenceTask):
-    TASK_NAME = "text-to-speech"
-
-
-build_url = TextToSpeech.build_url
-map_model = TextToSpeech.map_model
-prepare_headers = TextToSpeech.prepare_headers
-prepare_payload = TextToSpeech.prepare_payload
-get_response = TextToSpeech.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/token_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/token_classification.py
deleted file mode 100644
index 0b460f2a3d..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/token_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class TokenClassification(BaseInferenceTask):
-    TASK_NAME = "token-classification"
-
-
-build_url = TokenClassification.build_url
-map_model = TokenClassification.map_model
-prepare_headers = TokenClassification.prepare_headers
-prepare_payload = TokenClassification.prepare_payload
-get_response = TokenClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/translation.py b/src/huggingface_hub/inference/_providers/hf_inference/translation.py
deleted file mode 100644
index bd481178a9..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/translation.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class Translation(BaseInferenceTask):
-    TASK_NAME = "translation"
-
-
-build_url = Translation.build_url
-map_model = Translation.map_model
-prepare_headers = Translation.prepare_headers
-prepare_payload = Translation.prepare_payload
-get_response = Translation.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/visual_question_answering.py b/src/huggingface_hub/inference/_providers/hf_inference/visual_question_answering.py
deleted file mode 100644
index 3ac830d5cd..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/visual_question_answering.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class VisualQuestionAnswering(BaseInferenceTask):
-    TASK_NAME = "visual-question-answering"
-
-
-build_url = VisualQuestionAnswering.build_url
-map_model = VisualQuestionAnswering.map_model
-prepare_headers = VisualQuestionAnswering.prepare_headers
-prepare_payload = VisualQuestionAnswering.prepare_payload
-get_response = VisualQuestionAnswering.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/zero_shot_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/zero_shot_classification.py
deleted file mode 100644
index adcd4f3ec0..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/zero_shot_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ZeroShotClassification(BaseInferenceTask):
-    TASK_NAME = "zero-shot-classification"
-
-
-build_url = ZeroShotClassification.build_url
-map_model = ZeroShotClassification.map_model
-prepare_headers = ZeroShotClassification.prepare_headers
-prepare_payload = ZeroShotClassification.prepare_payload
-get_response = ZeroShotClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/hf_inference/zero_shot_image_classification.py b/src/huggingface_hub/inference/_providers/hf_inference/zero_shot_image_classification.py
deleted file mode 100644
index 77dc211f26..0000000000
--- a/src/huggingface_hub/inference/_providers/hf_inference/zero_shot_image_classification.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ._common import BaseInferenceTask
-
-
-class ZeroShotImageClassification(BaseInferenceTask):
-    TASK_NAME = "zero-shot-image-classification"
-
-
-build_url = ZeroShotImageClassification.build_url
-map_model = ZeroShotImageClassification.map_model
-prepare_headers = ZeroShotImageClassification.prepare_headers
-prepare_payload = ZeroShotImageClassification.prepare_payload
-get_response = ZeroShotImageClassification.get_response
diff --git a/src/huggingface_hub/inference/_providers/replicate/text_to_image.py b/src/huggingface_hub/inference/_providers/replicate/text_to_image.py
index 165b05cae0..c413a423b6 100644
--- a/src/huggingface_hub/inference/_providers/replicate/text_to_image.py
+++ b/src/huggingface_hub/inference/_providers/replicate/text_to_image.py
@@ -30,15 +30,9 @@ def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict:
     return headers
 
 
-def prepare_payload(
-    inputs: Any,
-    parameters: Dict[str, Any],
-    model: Optional[str] = None,
-    *,
-    expect_binary: bool = False,
-) -> Dict[str, Any]:
-    parameters = {k: v for k, v in parameters.items() if v is not None}
-    payload = {"json": {"input": {"prompt": inputs, **parameters}}}
+def prepare_payload(inputs: Any, parameters: Dict[str, Any], model: Optional[str]) -> Dict[str, Any]:
+    payload = {"json": {"input": {"prompt": inputs, **{k: v for k, v in parameters.items() if v is not None}}}}
+    model = parameters.get("model")
     if model is not None and ":" in model:
         version = model.split(":", 1)[1]
         payload["json"]["version"] = version  # type: ignore
diff --git a/src/huggingface_hub/inference/_providers/sambanova/conversational.py b/src/huggingface_hub/inference/_providers/sambanova/conversational.py
index e6a43bc53a..0a0afaa394 100644
--- a/src/huggingface_hub/inference/_providers/sambanova/conversational.py
+++ b/src/huggingface_hub/inference/_providers/sambanova/conversational.py
@@ -34,13 +34,7 @@ def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict:
     return headers
 
 
-def prepare_payload(
-    inputs: Any,
-    parameters: Dict[str, Any],
-    model: Optional[str] = None,
-    *,
-    expect_binary: bool = False,
-) -> Dict[str, Any]:
+def prepare_payload(inputs: Any, parameters: Dict[str, Any], model: Optional[str] = None) -> Dict[str, Any]:
     payload = {
         "messages": inputs,
         "model": model,
diff --git a/src/huggingface_hub/inference/_providers/together/conversational.py b/src/huggingface_hub/inference/_providers/together/conversational.py
index c87cbb5917..3cecba2faf 100644
--- a/src/huggingface_hub/inference/_providers/together/conversational.py
+++ b/src/huggingface_hub/inference/_providers/together/conversational.py
@@ -52,13 +52,7 @@ def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict:
     return headers
 
 
-def prepare_payload(
-    inputs: Any,
-    parameters: Dict[str, Any],
-    model: Optional[str] = None,
-    *,
-    expect_binary: bool = False,
-) -> Dict[str, Any]:
+def prepare_payload(inputs: Any, parameters: Dict[str, Any], model: Optional[str] = None) -> Dict[str, Any]:
     parameters = {key: value for key, value in parameters.items() if value is not None}
     payload = {
         "messages": inputs,
diff --git a/src/huggingface_hub/inference/_providers/together/text_generation.py b/src/huggingface_hub/inference/_providers/together/text_generation.py
index 88c8c4a165..c3838d52cc 100644
--- a/src/huggingface_hub/inference/_providers/together/text_generation.py
+++ b/src/huggingface_hub/inference/_providers/together/text_generation.py
@@ -24,13 +24,7 @@ def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict:
     return headers
 
 
-def prepare_payload(
-    inputs: Any,
-    parameters: Dict[str, Any],
-    model: Optional[str] = None,
-    *,
-    expect_binary: bool = False,
-) -> Dict[str, Any]:
+def prepare_payload(inputs: Any, parameters: Dict[str, Any], model: Optional[str] = None) -> Dict[str, Any]:
     parameters = {key: value for key, value in parameters.items() if value is not None}
     payload = {
         "messages": inputs,
diff --git a/src/huggingface_hub/inference/_providers/together/text_to_image.py b/src/huggingface_hub/inference/_providers/together/text_to_image.py
index df323cef06..762f78dc63 100644
--- a/src/huggingface_hub/inference/_providers/together/text_to_image.py
+++ b/src/huggingface_hub/inference/_providers/together/text_to_image.py
@@ -30,13 +30,7 @@ def prepare_headers(headers: Dict, *, token: Optional[str] = None) -> Dict:
     return headers
 
 
-def prepare_payload(
-    inputs: Any,
-    parameters: Dict[str, Any],
-    model: Optional[str] = None,
-    *,
-    expect_binary: bool = False,
-) -> Dict[str, Any]:
+def prepare_payload(inputs: Any, parameters: Dict[str, Any], model: Optional[str] = None) -> Dict[str, Any]:
     parameters = {key: value for key, value in parameters.items() if value is not None}
     payload = {"json": {"prompt": inputs, "model": model, "response_format": "base64", **parameters}}
     return payload