diff --git a/setup.py b/setup.py
index 63414346..7d8c4bb6 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 # We don't declare our dependency on transformers here because we build with
 # different packages for different variants
 
-VERSION = "0.5.2"
+VERSION = "0.5.3"
 
 # Ubuntu packages
 # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev
@@ -13,7 +13,7 @@
 # libavcodec-extra : libavcodec-extra  includes additional codecs for ffmpeg
 
 install_requires = [
-    "transformers[sklearn,sentencepiece,audio,vision,sentencepiece]==4.46.1",
+    "transformers[sklearn,sentencepiece,audio,vision]==4.47.0",
     "huggingface_hub[hf_transfer]==0.26.2",
     # vision
     "Pillow",
@@ -31,11 +31,11 @@
 
 extras = {}
 
-extras["st"] = ["sentence_transformers==3.2.1"]
-extras["diffusers"] = ["diffusers==0.31.0", "accelerate==1.0.1"]
+extras["st"] = ["sentence_transformers==3.3.1"]
+extras["diffusers"] = ["diffusers==0.31.0", "accelerate==1.1.0"]
 # Includes `peft` as PEFT requires `torch` so having `peft` as a core dependency
 # means that `torch` will be installed even if the `torch` extra is not specified.
-extras["torch"] = ["torch==2.3.1", "torchvision", "torchaudio", "peft==0.13.2"]
+extras["torch"] = ["torch==2.3.1", "torchvision", "torchaudio", "peft==0.14.0"]
 extras["test"] = [
     "pytest==7.2.1",
     "pytest-xdist",
diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py
index 70b683ab..47ddf390 100644
--- a/src/huggingface_inference_toolkit/diffusers_utils.py
+++ b/src/huggingface_inference_toolkit/diffusers_utils.py
@@ -22,9 +22,7 @@ def is_diffusers_available():
 
 
 class IEAutoPipelineForText2Image:
-    def __init__(
-        self, model_dir: str, device: Union[str, None] = None, **kwargs
-    ):  # needs "cuda" for GPU
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs):  # needs "cuda" for GPU
         dtype = torch.float32
         if device == "cuda":
             dtype = torch.bfloat16 if is_torch_bf16_gpu_available() else torch.float16
@@ -36,9 +34,7 @@ def __init__(
         # try to use DPMSolverMultistepScheduler
         if isinstance(self.pipeline, StableDiffusionPipeline):
             try:
-                self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
-                    self.pipeline.scheduler.config
-                )
+                self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
             except Exception:
                 pass
 
@@ -47,6 +43,13 @@ def __call__(
         prompt,
         **kwargs,
     ):
+        if "prompt" in kwargs:
+            logger.warning(
+                "prompt has been provided twice, both via arg and kwargs, so the `prompt` arg will be used "
+                "instead, and the `prompt` in kwargs will be discarded."
+            )
+            kwargs.pop("prompt")
+
         # diffusers doesn't support seed but rather the generator kwarg
         # see: https://github.com/huggingface/api-inference-community/blob/8e577e2d60957959ba02f474b2913d84a9086b82/docker_images/diffusers/app/pipelines/text_to_image.py#L172-L176
         if "seed" in kwargs:
@@ -58,9 +61,16 @@ def __call__(
         # TODO: add support for more images (Reason is correct output)
         if "num_images_per_prompt" in kwargs:
             kwargs.pop("num_images_per_prompt")
-            logger.warning(
-                "Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1."
-            )
+            logger.warning("Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1.")
+
+        if "target_size" in kwargs:
+            kwargs["height"] = kwargs["target_size"].pop("height", None)
+            kwargs["width"] = kwargs["target_size"].pop("width", None)
+            kwargs.pop("target_size")
+
+        if "output_type" in kwargs and kwargs["output_type"] != "pil":
+            kwargs.pop("output_type")
+            logger.warning("The `output_type` cannot be modified, and PIL will be used by default instead.")
 
         # Call pipeline with parameters
         out = self.pipeline(prompt, num_images_per_prompt=1, **kwargs)
diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py
index 636f185b..66f4acdc 100644
--- a/src/huggingface_inference_toolkit/handler.py
+++ b/src/huggingface_inference_toolkit/handler.py
@@ -1,8 +1,9 @@
 import os
 from pathlib import Path
-from typing import Optional, Union
+from typing import Any, Dict, Literal, Optional, Union
 
 from huggingface_inference_toolkit.const import HF_TRUST_REMOTE_CODE
+from huggingface_inference_toolkit.sentence_transformers_utils import SENTENCE_TRANSFORMERS_TASKS
 from huggingface_inference_toolkit.utils import (
     check_and_register_custom_pipeline_from_directory,
     get_pipeline,
@@ -12,18 +13,20 @@
 class HuggingFaceHandler:
     """
     A Default Hugging Face Inference Handler which works with all
-    transformers pipelines, Sentence Transformers and Optimum.
+    Transformers, Diffusers, Sentence Transformers and Optimum pipelines.
     """
 
-    def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"):
+    def __init__(
+        self, model_dir: Union[str, Path], task: Union[str, None] = None, framework: Literal["pt"] = "pt"
+    ) -> None:
         self.pipeline = get_pipeline(
-            model_dir=model_dir,
-            task=task,
+            model_dir=model_dir,  # type: ignore
+            task=task,  # type: ignore
             framework=framework,
             trust_remote_code=HF_TRUST_REMOTE_CODE,
         )
 
-    def __call__(self, data):
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Handles an inference request with input data and makes a prediction.
         Args:
@@ -31,15 +34,66 @@ def __call__(self, data):
         :return: prediction output
         """
         inputs = data.pop("inputs", data)
-        parameters = data.pop("parameters", None)
-
-        # pass inputs with all kwargs in data
-        if parameters is not None:
-            prediction = self.pipeline(inputs, **parameters)
-        else:
-            prediction = self.pipeline(inputs)
-        # postprocess the prediction
-        return prediction
+        parameters = data.pop("parameters", {})
+
+        # sentence transformers pipelines do not have the `task` arg
+        if any(isinstance(self.pipeline, v) for v in SENTENCE_TRANSFORMERS_TASKS.values()):
+            return self.pipeline(**inputs) if isinstance(inputs, dict) else self.pipeline(inputs)  # type: ignore
+
+        if self.pipeline.task == "question-answering":
+            if not isinstance(inputs, dict):
+                raise ValueError(f"inputs must be a dict, but a `{type(inputs)}` was provided instead.")
+            if not all(k in inputs for k in {"question", "context"}):
+                raise ValueError(
+                    f"{self.pipeline.task} expects `inputs` to be a dict containing both `question` and "
+                    "`context` as the keys, both of them being either a `str` or a `List[str]`."
+                )
+
+        if self.pipeline.task == "table-question-answering":
+            if not isinstance(inputs, dict):
+                raise ValueError(f"inputs must be a dict, but a `{type(inputs)}` was provided instead.")
+            if "question" in inputs:
+                inputs["query"] = inputs.pop("question")
+            if not all(k in inputs for k in {"table", "query"}):
+                raise ValueError(
+                    f"{self.pipeline.task} expects `inputs` to be a dict containing the keys `table` and "
+                    "either `question` or `query`."
+                )
+
+        if self.pipeline.task.__contains__("translation") or self.pipeline.task in {
+            "text-generation",
+            "image-to-text",
+            "automatic-speech-recognition",
+            "text-to-audio",
+            "text-to-speech",
+        }:
+            # `generate_kwargs` needs to be a dict, `generation_parameters` is here for forward compatibility
+            if "generation_parameters" in parameters:
+                parameters["generate_kwargs"] = parameters.pop("generation_parameters")
+
+        if self.pipeline.task.__contains__("translation") or self.pipeline.task in {"text-generation"}:
+            # flatten the values of `generate_kwargs` as it's not supported as is, but via top-level parameters
+            generate_kwargs = parameters.pop("generate_kwargs", {})
+            for key, value in generate_kwargs.items():
+                parameters[key] = value
+
+        if self.pipeline.task.__contains__("zero-shot-classification"):
+            if "candidateLabels" in parameters:
+                parameters["candidate_labels"] = parameters.pop("candidateLabels")
+            if not isinstance(inputs, dict):
+                inputs = {"sequences": inputs}
+            if "text" in inputs:
+                inputs["sequences"] = inputs.pop("text")
+            if not all(k in inputs for k in {"sequences"}) or not all(k in parameters for k in {"candidate_labels"}):
+                raise ValueError(
+                    f"{self.pipeline.task} expects `inputs` to be either a string or a dict containing the "
+                    "key `text` or `sequences`, and `parameters` to be a dict containing either `candidate_labels` "
+                    "or `candidateLabels`."
+                )
+
+        return (
+            self.pipeline(**inputs, **parameters) if isinstance(inputs, dict) else self.pipeline(inputs, **parameters)  # type: ignore
+        )
 
 
 class VertexAIHandler(HuggingFaceHandler):
@@ -48,10 +102,12 @@ class VertexAIHandler(HuggingFaceHandler):
     Vertex AI specific logic for inference.
     """
 
-    def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"):
-        super().__init__(model_dir, task, framework)
+    def __init__(
+        self, model_dir: Union[str, Path], task: Union[str, None] = None, framework: Literal["pt"] = "pt"
+    ) -> None:
+        super().__init__(model_dir=model_dir, task=task, framework=framework)
 
-    def __call__(self, data):
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Handles an inference request with input data and makes a prediction.
         Args:
@@ -59,10 +115,8 @@ def __call__(self, data):
         :return: prediction output
         """
         if "instances" not in data:
-            raise ValueError(
-                "The request body must contain a key 'instances' with a list of instances."
-            )
-        parameters = data.pop("parameters", None)
+            raise ValueError("The request body must contain a key 'instances' with a list of instances.")
+        parameters = data.pop("parameters", {})
 
         predictions = []
         # iterate over all instances and make predictions
@@ -74,9 +128,7 @@ def __call__(self, data):
         return {"predictions": predictions}
 
 
-def get_inference_handler_either_custom_or_default_handler(
-    model_dir: Path, task: Optional[str] = None
-):
+def get_inference_handler_either_custom_or_default_handler(model_dir: Path, task: Optional[str] = None) -> Any:
     """
     Returns the appropriate inference handler based on the given model directory and task.
 
@@ -88,9 +140,10 @@ def get_inference_handler_either_custom_or_default_handler(
         InferenceHandler: The appropriate inference handler based on the given model directory and task.
     """
     custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir)
-    if custom_pipeline:
+    if custom_pipeline is not None:
         return custom_pipeline
-    elif os.environ.get("AIP_MODE", None) == "PREDICTION":
+
+    if os.environ.get("AIP_MODE", None) == "PREDICTION":
         return VertexAIHandler(model_dir=model_dir, task=task)
-    else:
-        return HuggingFaceHandler(model_dir=model_dir, task=task)
+
+    return HuggingFaceHandler(model_dir=model_dir, task=task)
diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py
index 6b55ae76..0d648420 100644
--- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py
+++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py
@@ -1,4 +1,10 @@
 import importlib.util
+from typing import Any, Dict, List, Tuple, Union
+
+try:
+    from typing import Literal
+except ImportError:
+    from typing_extensions import Literal
 
 _sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None
 
@@ -12,40 +18,73 @@ def is_sentence_transformers_available():
 
 
 class SentenceSimilarityPipeline:
-    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None:
+        # `device` needs to be set to "cuda" for GPU
         self.model = SentenceTransformer(model_dir, device=device, **kwargs)
 
-    def __call__(self, inputs=None):
-        embeddings1 = self.model.encode(
-            inputs["source_sentence"], convert_to_tensor=True
-        )
-        embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True)
+    def __call__(self, source_sentence: str, sentences: List[str]) -> Dict[str, float]:
+        embeddings1 = self.model.encode(source_sentence, convert_to_tensor=True)
+        embeddings2 = self.model.encode(sentences, convert_to_tensor=True)
         similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0]
         return {"similarities": similarities}
 
 
 class SentenceEmbeddingPipeline:
-    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None:
+        # `device` needs to be set to "cuda" for GPU
         self.model = SentenceTransformer(model_dir, device=device, **kwargs)
 
-    def __call__(self, inputs):
-        embeddings = self.model.encode(inputs).tolist()
+    def __call__(self, sentences: Union[str, List[str]]) -> Dict[str, List[float]]:
+        embeddings = self.model.encode(sentences).tolist()
         return {"embeddings": embeddings}
 
 
-class RankingPipeline:
-    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+class SentenceRankingPipeline:
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None:
+        # `device` needs to be set to "cuda" for GPU
         self.model = CrossEncoder(model_dir, device=device, **kwargs)
 
-    def __call__(self, inputs):
-        scores = self.model.predict(inputs).tolist()
-        return {"scores": scores}
+    def __call__(
+        self,
+        sentences: Union[Tuple[str, str], List[str], List[List[str]], List[Tuple[str, str]], None] = None,
+        query: Union[str, None] = None,
+        texts: Union[List[str], None] = None,
+        return_documents: bool = False,
+    ) -> Union[Dict[str, List[float]], List[Dict[Literal["index", "score", "text"], Any]]]:
+        if all(x is not None for x in [sentences, query, texts]):
+            raise ValueError(
+                f"The provided payload contains {sentences=} (i.e. 'inputs'), {query=}, and {texts=}"
+                " but all of those cannot be provided, you should provide either only 'sentences' i.e. 'inputs'"
+                " of both 'query' and 'texts' to run the ranking task."
+            )
+
+        if all(x is None for x in [sentences, query, texts]):
+            raise ValueError(
+                "No inputs have been provided within the input payload, make sure that the input payload"
+                " contains either 'sentences' i.e. 'inputs', or both 'query' and 'texts' to run the ranking task."
+            )
+
+        if sentences is not None:
+            scores = self.model.predict(sentences).tolist()
+            return {"scores": scores}
+
+        if query is None or not isinstance(query, str):
+            raise ValueError(f"Provided {query=} but a non-empty string should be provided instead.")
+
+        if texts is None or not isinstance(texts, list) or not all(isinstance(text, str) for text in texts):
+            raise ValueError(f"Provided {texts=}, but a list of non-empty strings should be provided instead.")
+
+        scores = self.model.rank(query, texts, return_documents=return_documents)
+        # rename "corpus_id" key to "index" for all scores to match TEI
+        for score in scores:
+            score["index"] = score.pop("corpus_id")  # type: ignore
+        return scores  # type: ignore
 
 
 SENTENCE_TRANSFORMERS_TASKS = {
     "sentence-similarity": SentenceSimilarityPipeline,
     "sentence-embeddings": SentenceEmbeddingPipeline,
-    "sentence-ranking": RankingPipeline,
+    "sentence-ranking": SentenceRankingPipeline,
 }
 
 
@@ -56,9 +95,5 @@ def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **k
     kwargs.pop("framework", None)
 
     if task not in SENTENCE_TRANSFORMERS_TASKS:
-        raise ValueError(
-            f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}"
-        )
-    return SENTENCE_TRANSFORMERS_TASKS[task](
-        model_dir=model_dir, device=device, **kwargs
-    )
+        raise ValueError(f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}")
+    return SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device, **kwargs)
diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py
index a5ff7aee..8dc644c7 100644
--- a/src/huggingface_inference_toolkit/utils.py
+++ b/src/huggingface_inference_toolkit/utils.py
@@ -134,9 +134,7 @@ def _load_repository_from_hf(
 
     # create regex to only include the framework specific weights
     ignore_regex = create_artifact_filter(framework)
-    logger.info(
-        f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }"
-    )
+    logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }")
 
     # Download the repository to the workdir and filter out non-framework
     # specific weights
@@ -177,9 +175,7 @@ def check_and_register_custom_pipeline_from_directory(model_dir):
             Please update to the new format.
             See documentation for more information."""
         )
-        spec = importlib.util.spec_from_file_location(
-            "pipeline.PreTrainedPipeline", legacy_module
-        )
+        spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module)
         if spec:
             # add the whole directory to path for submodlues
             sys.path.insert(0, model_dir)
@@ -208,23 +204,28 @@ def get_device():
 
 
 def get_pipeline(
-    task: str,
+    task: Union[str, None],
     model_dir: Path,
     **kwargs,
 ) -> Pipeline:
     """
     create pipeline class for a specific task based on local saved model
     """
-    device = get_device()
-    if is_optimum_neuron_available():
-        logger.info("Using device Neuron")
-    else:
-        logger.info(f"Using device { 'GPU' if device == 0 else 'CPU'}")
-
     if task is None:
         raise EnvironmentError(
             "The task for this model is not set: Please set one: https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined"
         )
+
+    if task == "conversational":
+        task = "text-generation"
+
+    if is_optimum_neuron_available():
+        logger.info("Using device Neuron")
+        return get_optimum_neuron_pipeline(task=task, model_dir=model_dir)
+
+    device = get_device()
+    logger.info(f"Using device {'GPU' if device == 0 else 'CPU'}")
+
     # define tokenizer or feature extractor as kwargs to load it the pipeline
     # correctly
     if task in {
@@ -236,41 +237,27 @@ def get_pipeline(
         "zero-shot-image-classification",
     }:
         kwargs["feature_extractor"] = model_dir
-    elif task in {"image-to-text", "text-to-image"}:
-        pass
-    elif task == "conversational":
-        task = "text-generation"
-    else:
+    elif task not in {"image-to-text", "text-to-image"}:
         kwargs["tokenizer"] = model_dir
 
-    if is_optimum_neuron_available():
-        hf_pipeline = get_optimum_neuron_pipeline(task=task, model_dir=model_dir)
-    elif is_sentence_transformers_available() and task in [
+    if is_sentence_transformers_available() and task in [
         "sentence-similarity",
         "sentence-embeddings",
         "sentence-ranking",
     ]:
-        hf_pipeline = get_sentence_transformers_pipeline(
-            task=task, model_dir=model_dir, device=device, **kwargs
-        )
+        hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs)
     elif is_diffusers_available() and task == "text-to-image":
-        hf_pipeline = get_diffusers_pipeline(
-            task=task, model_dir=model_dir, device=device, **kwargs
-        )
+        hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs)
     else:
         hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs)
 
-    if task == "automatic-speech-recognition" and isinstance(
-        hf_pipeline.model, WhisperForConditionalGeneration
-    ):
+    if task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration):
         # set chunk length to 30s for whisper to enable long audio files
         hf_pipeline._preprocess_params["chunk_length_s"] = 30
-        hf_pipeline.model.config.forced_decoder_ids = (
-            hf_pipeline.tokenizer.get_decoder_prompt_ids(
-                language="english", task="transcribe"
-            )
+        hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids(
+            language="english", task="transcribe"
         )
-    return hf_pipeline
+    return hf_pipeline  # type: ignore
 
 
 def convert_params_to_int_or_bool(params):
diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py
index 052a5bfc..2935d6e7 100644
--- a/tests/unit/test_handler.py
+++ b/tests/unit/test_handler.py
@@ -1,4 +1,5 @@
 import tempfile
+from typing import Dict
 
 import pytest
 from transformers.testing_utils import require_tf, require_torch
@@ -14,11 +15,16 @@
 
 TASK = "text-classification"
 MODEL = "hf-internal-testing/tiny-random-distilbert"
-INPUT = {"inputs": "My name is Wolfgang and I live in Berlin"}
+
+
+# defined as fixture because it's modified on `pop`
+@pytest.fixture
+def input_data():
+    return {"inputs": "My name is Wolfgang and I live in Berlin"}
 
 
 @require_torch
-def test_pt_get_device():
+def test_pt_get_device() -> None:
     import torch
 
     with tempfile.TemporaryDirectory() as tmpdirname:
@@ -32,52 +38,45 @@ def test_pt_get_device():
 
 
 @require_torch
-def test_pt_predict_call():
+def test_pt_predict_call(input_data: Dict[str, str]) -> None:
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
         storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch")
         h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK)
 
-        prediction = h(INPUT)
+        prediction = h(input_data)
         assert "label" in prediction[0]
         assert "score" in prediction[0]
 
 
 @require_torch
-def test_pt_custom_pipeline():
+def test_pt_custom_pipeline(input_data: Dict[str, str]) -> None:
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
             "philschmid/custom-pipeline-text-classification",
             tmpdirname,
             framework="pytorch",
         )
-        h = get_inference_handler_either_custom_or_default_handler(
-            str(storage_dir), task="custom"
-        )
-        assert h(INPUT) == INPUT
+        h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom")
+        assert h(input_data) == input_data
 
 
 @require_torch
-def test_pt_sentence_transformers_pipeline():
+def test_pt_sentence_transformers_pipeline(input_data: Dict[str, str]) -> None:
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
             "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="pytorch"
         )
-        h = get_inference_handler_either_custom_or_default_handler(
-            str(storage_dir), task="sentence-embeddings"
-        )
-        pred = h(INPUT)
+        h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings")
+        pred = h(input_data)
         assert isinstance(pred["embeddings"], list)
 
 
 @require_tf
 def test_tf_get_device():
-
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
-        storage_dir = _load_repository_from_hf(
-            MODEL, tmpdirname, framework="tensorflow"
-        )
+        storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow")
         h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK)
         if _is_gpu_available():
             assert h.pipeline.device == 0
@@ -86,33 +85,27 @@ def test_tf_get_device():
 
 
 @require_tf
-def test_tf_predict_call():
+def test_tf_predict_call(input_data: Dict[str, str]) -> None:
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
-        storage_dir = _load_repository_from_hf(
-            MODEL, tmpdirname, framework="tensorflow"
-        )
-        handler = HuggingFaceHandler(
-            model_dir=str(storage_dir), task=TASK, framework="tf"
-        )
+        storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow")
+        handler = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK, framework="tf")
 
-        prediction = handler(INPUT)
+        prediction = handler(input_data)
         assert "label" in prediction[0]
         assert "score" in prediction[0]
 
 
 @require_tf
-def test_tf_custom_pipeline():
+def test_tf_custom_pipeline(input_data: Dict[str, str]) -> None:
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
             "philschmid/custom-pipeline-text-classification",
             tmpdirname,
             framework="tensorflow",
         )
-        h = get_inference_handler_either_custom_or_default_handler(
-            str(storage_dir), task="custom"
-        )
-        assert h(INPUT) == INPUT
+        h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom")
+        assert h(input_data) == input_data
 
 
 @require_tf
@@ -123,6 +116,4 @@ def test_tf_sentence_transformers_pipeline():
             "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="tensorflow"
         )
         with pytest.raises(Exception) as _exc_info:
-            get_inference_handler_either_custom_or_default_handler(
-                str(storage_dir), task="sentence-embeddings"
-            )
+            get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings")
diff --git a/tests/unit/test_sentence_transformers.py b/tests/unit/test_sentence_transformers.py
index 635f39de..e48533bc 100644
--- a/tests/unit/test_sentence_transformers.py
+++ b/tests/unit/test_sentence_transformers.py
@@ -1,5 +1,6 @@
 import tempfile
 
+import pytest
 from transformers.testing_utils import require_torch
 
 from huggingface_inference_toolkit.sentence_transformers_utils import (
@@ -15,9 +16,7 @@
 @require_torch
 def test_get_sentence_transformers_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
-        storage_dir = _load_repository_from_hf(
-            "sentence-transformers/all-MiniLM-L6-v2", tmpdirname
-        )
+        storage_dir = _load_repository_from_hf("sentence-transformers/all-MiniLM-L6-v2", tmpdirname)
         pipe = get_pipeline("sentence-embeddings", storage_dir.as_posix())
         assert isinstance(pipe, SentenceEmbeddingPipeline)
 
@@ -25,22 +24,21 @@ def test_get_sentence_transformers_pipeline():
 @require_torch
 def test_sentence_embedding_task():
     with tempfile.TemporaryDirectory() as tmpdirname:
-        storage_dir = _load_repository_from_hf(
-            "sentence-transformers/all-MiniLM-L6-v2", tmpdirname
-        )
+        storage_dir = _load_repository_from_hf("sentence-transformers/all-MiniLM-L6-v2", tmpdirname)
         pipe = get_sentence_transformers_pipeline("sentence-embeddings", storage_dir.as_posix())
-        res = pipe("Lets create an embedding")
+        res = pipe(sentences="Lets create an embedding")
+        assert isinstance(res["embeddings"], list)
+        res = pipe(sentences=["Lets create an embedding", "Lets create another embedding"])
         assert isinstance(res["embeddings"], list)
+        assert len(res["embeddings"]) == 2
 
 
 @require_torch
 def test_sentence_similarity():
     with tempfile.TemporaryDirectory() as tmpdirname:
-        storage_dir = _load_repository_from_hf(
-            "sentence-transformers/all-MiniLM-L6-v2", tmpdirname
-        )
+        storage_dir = _load_repository_from_hf("sentence-transformers/all-MiniLM-L6-v2", tmpdirname)
         pipe = get_sentence_transformers_pipeline("sentence-similarity", storage_dir.as_posix())
-        res = pipe({"source_sentence": "Lets create an embedding", "sentences": ["Lets create an embedding"]})
+        res = pipe(source_sentence="Lets create an embedding", sentences=["Lets create an embedding"])
         assert isinstance(res["similarities"], list)
 
 
@@ -50,13 +48,67 @@ def test_sentence_ranking():
         storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname)
         pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix())
         res = pipe(
-            [
-                ["Lets create an embedding", "Lets create an embedding"],
-                ["Lets create an embedding", "Lets create an embedding"],
+            sentences=[
+                ["Lets create an embedding", "Lets create another embedding"],
+                ["Lets create an embedding", "Lets create another embedding"],
             ]
         )
         assert isinstance(res["scores"], list)
+        res = pipe(sentences=["Lets create an embedding", "Lets create an embedding"])
+        assert isinstance(res["scores"], float)
+
+
+@require_torch
+def test_sentence_ranking_tei():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname, framework="pytorch")
+        pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix())
         res = pipe(
-            ["Lets create an embedding", "Lets create an embedding"],
+            query="Lets create an embedding",
+            texts=["Lets create an embedding", "I like noodles"],
         )
-        assert isinstance(res["scores"], float)
+        assert isinstance(res, list)
+        assert all(r.keys() == {"index", "score"} for r in res)
+
+        res = pipe(
+            query="Lets create an embedding",
+            texts=["Lets create an embedding", "I like noodles"],
+            return_documents=True,
+        )
+        assert isinstance(res, list)
+        assert all(r.keys() == {"index", "score", "text"} for r in res)
+
+
+@require_torch
+def test_sentence_ranking_validation_errors():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname, framework="pytorch")
+        pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix())
+
+        with pytest.raises(
+            ValueError,
+            match=(
+                "you should provide either only 'sentences' i.e. 'inputs' "
+                "of both 'query' and 'texts' to run the ranking task."
+            ),
+        ):
+            pipe(
+                sentences="Lets create an embedding",
+                query="Lets create an embedding",
+                texts=["Lets create an embedding", "I like noodles"],
+            )
+
+        with pytest.raises(
+            ValueError,
+            match=(
+                "No inputs have been provided within the input payload, make sure that the input payload "
+                "contains either 'sentences' i.e. 'inputs', or both 'query' and 'texts'"
+            ),
+        ):
+            pipe(sentences=None, query=None, texts=None)
+
+        with pytest.raises(
+            ValueError,
+            match=("Provided texts=None, but a list of non-empty strings should be provided instead."),
+        ):
+            pipe(sentences=None, query="Lets create an embedding", texts=None)