huggingface · alvarobartt · Dec 12, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
 # libavcodec-extra : libavcodec-extra  includes additional codecs for ffmpeg
 
 install_requires = [
-    "transformers[sklearn,sentencepiece,audio,vision,sentencepiece]==4.46.1",
+    "transformers[sklearn,sentencepiece,audio,vision]==4.46.1",
     "huggingface_hub[hf_transfer]==0.26.2",
     # vision
     "Pillow",

diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py
@@ -22,9 +22,7 @@ def is_diffusers_available():
 
 
 class IEAutoPipelineForText2Image:
-    def __init__(
-        self, model_dir: str, device: Union[str, None] = None, **kwargs
-    ):  # needs "cuda" for GPU
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs):  # needs "cuda" for GPU
         dtype = torch.float32
         if device == "cuda":
             dtype = torch.bfloat16 if is_torch_bf16_gpu_available() else torch.float16
@@ -36,9 +34,7 @@ def __init__(
         # try to use DPMSolverMultistepScheduler
         if isinstance(self.pipeline, StableDiffusionPipeline):
             try:
-                self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
-                    self.pipeline.scheduler.config
-                )
+                self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
             except Exception:
                 pass
 
@@ -47,6 +43,13 @@ def __call__(
         prompt,
         **kwargs,
     ):
+        if "prompt" in kwargs:
+            logger.warning(
+                "prompt has been provided twice, both via arg and kwargs, so the `prompt` arg will be used "
+                "instead, and the `prompt` in kwargs will be discarded."
+            )
+            kwargs.pop("prompt")
+
         # diffusers doesn't support seed but rather the generator kwarg
         # see: https://github.com/huggingface/api-inference-community/blob/8e577e2d60957959ba02f474b2913d84a9086b82/docker_images/diffusers/app/pipelines/text_to_image.py#L172-L176
         if "seed" in kwargs:
@@ -58,9 +61,16 @@ def __call__(
         # TODO: add support for more images (Reason is correct output)
         if "num_images_per_prompt" in kwargs:
             kwargs.pop("num_images_per_prompt")
-            logger.warning(
-                "Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1."
-            )
+            logger.warning("Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1.")
+
+        if "target_size" in kwargs:
+            kwargs["height"] = kwargs["target_size"].pop("height", None)
+            kwargs["width"] = kwargs["target_size"].pop("width", None)
+            kwargs.pop("target_size")
+
+        if "output_type" in kwargs and kwargs["output_type"] != "pil":
+            kwargs.pop("output_type")
+            logger.warning("The `output_type` cannot be modified, and PIL will be used by default instead.")
 
         # Call pipeline with parameters
         out = self.pipeline(prompt, num_images_per_prompt=1, **kwargs)

diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py
@@ -1,8 +1,10 @@
 import os
 from pathlib import Path
-from typing import Optional, Union
+from typing import Any, Dict, Literal, Optional, Union
 
 from huggingface_inference_toolkit.const import HF_TRUST_REMOTE_CODE
+from huggingface_inference_toolkit.logging import logger
+from huggingface_inference_toolkit.sentence_transformers_utils import SENTENCE_TRANSFORMERS_TASKS
 from huggingface_inference_toolkit.utils import (
     check_and_register_custom_pipeline_from_directory,
     get_pipeline,
@@ -12,34 +14,86 @@
 class HuggingFaceHandler:
     """
     A Default Hugging Face Inference Handler which works with all
-    transformers pipelines, Sentence Transformers and Optimum.
+    Transformers, Diffusers, Sentence Transformers and Optimum pipelines.
     """
 
-    def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"):
+    def __init__(
+        self, model_dir: Union[str, Path], task: Union[str, None] = None, framework: Literal["pt"] = "pt"
+    ) -> None:
         self.pipeline = get_pipeline(
-            model_dir=model_dir,
-            task=task,
+            model_dir=model_dir,  # type: ignore
+            task=task,  # type: ignore
             framework=framework,
             trust_remote_code=HF_TRUST_REMOTE_CODE,
         )
 
-    def __call__(self, data):
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Handles an inference request with input data and makes a prediction.
         Args:
             :data: (obj): the raw request body data.
         :return: prediction output
         """
         inputs = data.pop("inputs", data)
-        parameters = data.pop("parameters", None)
-
-        # pass inputs with all kwargs in data
-        if parameters is not None:
-            prediction = self.pipeline(inputs, **parameters)
-        else:
-            prediction = self.pipeline(inputs)
-        # postprocess the prediction
-        return prediction
+        parameters = data.pop("parameters", {})
+
+        # sentence transformers pipelines do not have the `task` arg
+        if any(isinstance(self.pipeline, v) for v in SENTENCE_TRANSFORMERS_TASKS.values()):
+            return self.pipeline(**inputs) if isinstance(inputs, dict) else self.pipeline(inputs)  # type: ignore
+
+        if self.pipeline.task == "question-answering":
+            if not isinstance(inputs, dict):
+                raise ValueError(f"inputs must be a dict, but a `{type(inputs)}` was provided instead.")
+            if not all(k in inputs for k in {"question", "context"}):
+                raise ValueError(
+                    f"{self.pipeline.task} expects `inputs` to be a dict containing both `question` and "
+                    "`context` as the keys, both of them being either a `str` or a `List[str]`."
+                )
+
+        if self.pipeline.task == "table-question-answering":
+            if not isinstance(inputs, dict):
+                raise ValueError(f"inputs must be a dict, but a `{type(inputs)}` was provided instead.")
+            if "question" in inputs:
+                inputs["query"] = inputs.pop("question")
+            if not all(k in inputs for k in {"table", "query"}):
+                raise ValueError(
+                    f"{self.pipeline.task} expects `inputs` to be a dict containing the keys `table` and "
+                    "either `question` or `query`."
+                )
+
+        if self.pipeline.task in {"token-classification", "ner"}:
+            # stride and aggregation_strategy are defined on `pipeline` init, but in the Inference API those
+            # are provided on each request instead
+            for p in {"stride", "aggregation_strategy"}:
+                if p in parameters:
+                    parameters.pop(p)
+                    logger.warning(f"provided parameter `{p}`, but it's not supported.")
+
+        if self.pipeline.task.__contains__("translation"):
+            # truncation and generate_parameters are used on Inference API but not available on
+            # `TranslationPipeline.__call__` method
+            for p in {"truncation", "generate_parameters"}:
+                if p in parameters:
+                    parameters.pop(p)
+                    logger.warning(f"provided parameter `{p}`, but it's not supported.")
+
+        if self.pipeline.task.__contains__("zero-shot-classification"):
+            if "candidateLabels" in parameters:
+                parameters["candidate_labels"] = parameters.pop("candidateLabels")
+            if not isinstance(inputs, dict):
+                inputs = {"sequences": inputs}
+            if "text" in inputs:
+                inputs["sequences"] = inputs.pop("text")
+            if not all(k in inputs for k in {"sequences"}) or not all(k in parameters for k in {"candidate_labels"}):
+                raise ValueError(
+                    f"{self.pipeline.task} expects `inputs` to be either a string or a dict containing the "
+                    "key `text` or `sequences`, and `parameters` to be a dict containing either `candidate_labels` "
+                    "or `candidateLabels`."
+                )
+
+        return (
+            self.pipeline(**inputs, **parameters) if isinstance(inputs, dict) else self.pipeline(inputs, **parameters)  # type: ignore
+        )
 
 
 class VertexAIHandler(HuggingFaceHandler):
@@ -48,21 +102,21 @@ class VertexAIHandler(HuggingFaceHandler):
     Vertex AI specific logic for inference.
     """
 
-    def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"):
-        super().__init__(model_dir, task, framework)
+    def __init__(
+        self, model_dir: Union[str, Path], task: Union[str, None] = None, framework: Literal["pt"] = "pt"
+    ) -> None:
+        super().__init__(model_dir=model_dir, task=task, framework=framework)
 
-    def __call__(self, data):
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Handles an inference request with input data and makes a prediction.
         Args:
             :data: (obj): the raw request body data.
         :return: prediction output
         """
         if "instances" not in data:
-            raise ValueError(
-                "The request body must contain a key 'instances' with a list of instances."
-            )
-        parameters = data.pop("parameters", None)
+            raise ValueError("The request body must contain a key 'instances' with a list of instances.")
+        parameters = data.pop("parameters", {})
 
         predictions = []
         # iterate over all instances and make predictions
@@ -74,9 +128,7 @@ def __call__(self, data):
         return {"predictions": predictions}
 
 
-def get_inference_handler_either_custom_or_default_handler(
-    model_dir: Path, task: Optional[str] = None
-):
+def get_inference_handler_either_custom_or_default_handler(model_dir: Path, task: Optional[str] = None) -> Any:
     """
     Returns the appropriate inference handler based on the given model directory and task.
 
@@ -88,9 +140,10 @@ def get_inference_handler_either_custom_or_default_handler(
         InferenceHandler: The appropriate inference handler based on the given model directory and task.
     """
     custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir)
-    if custom_pipeline:
+    if custom_pipeline is not None:
         return custom_pipeline
-    elif os.environ.get("AIP_MODE", None) == "PREDICTION":
+
+    if os.environ.get("AIP_MODE", None) == "PREDICTION":
         return VertexAIHandler(model_dir=model_dir, task=task)
-    else:
-        return HuggingFaceHandler(model_dir=model_dir, task=task)
+
+    return HuggingFaceHandler(model_dir=model_dir, task=task)
diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py
@@ -1,4 +1,5 @@
 import importlib.util
+from typing import Any, Dict, List, Tuple, Union
 
 _sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None
 
@@ -12,33 +13,36 @@ def is_sentence_transformers_available():
 
 
 class SentenceSimilarityPipeline:
-    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None:
+        # `device` needs to be set to "cuda" for GPU
         self.model = SentenceTransformer(model_dir, device=device, **kwargs)
 
-    def __call__(self, inputs=None):
-        embeddings1 = self.model.encode(
-            inputs["source_sentence"], convert_to_tensor=True
-        )
-        embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True)
+    def __call__(self, source_sentence: str, sentences: List[str]) -> Dict[str, float]:
+        embeddings1 = self.model.encode(source_sentence, convert_to_tensor=True)
+        embeddings2 = self.model.encode(sentences, convert_to_tensor=True)
         similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0]
         return {"similarities": similarities}
 
 
 class SentenceEmbeddingPipeline:
-    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None:
+        # `device` needs to be set to "cuda" for GPU
         self.model = SentenceTransformer(model_dir, device=device, **kwargs)
 
-    def __call__(self, inputs):
-        embeddings = self.model.encode(inputs).tolist()
+    def __call__(self, sentences: Union[str, List[str]]) -> Dict[str, List[float]]:
+        embeddings = self.model.encode(sentences).tolist()
         return {"embeddings": embeddings}
 
 
 class RankingPipeline:
-    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+    def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None:
+        # `device` needs to be set to "cuda" for GPU
         self.model = CrossEncoder(model_dir, device=device, **kwargs)
 
-    def __call__(self, inputs):
-        scores = self.model.predict(inputs).tolist()
+    def __call__(
+        self, sentences: Union[Tuple[str, str], List[str], List[List[str]], List[Tuple[str, str]]]
+    ) -> Dict[str, List[float]]:
+        scores = self.model.predict(sentences).tolist()
         return {"scores": scores}
 
 
@@ -56,9 +60,5 @@ def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **k
     kwargs.pop("framework", None)
 
     if task not in SENTENCE_TRANSFORMERS_TASKS:
-        raise ValueError(
-            f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}"
-        )
-    return SENTENCE_TRANSFORMERS_TASKS[task](
-        model_dir=model_dir, device=device, **kwargs
-    )
+        raise ValueError(f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}")
+    return SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device, **kwargs)