diff --git a/setup.py b/setup.py index 63414346..7d8c4bb6 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ # We don't declare our dependency on transformers here because we build with # different packages for different variants -VERSION = "0.5.2" +VERSION = "0.5.3" # Ubuntu packages # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev @@ -13,7 +13,7 @@ # libavcodec-extra : libavcodec-extra includes additional codecs for ffmpeg install_requires = [ - "transformers[sklearn,sentencepiece,audio,vision,sentencepiece]==4.46.1", + "transformers[sklearn,sentencepiece,audio,vision]==4.47.0", "huggingface_hub[hf_transfer]==0.26.2", # vision "Pillow", @@ -31,11 +31,11 @@ extras = {} -extras["st"] = ["sentence_transformers==3.2.1"] -extras["diffusers"] = ["diffusers==0.31.0", "accelerate==1.0.1"] +extras["st"] = ["sentence_transformers==3.3.1"] +extras["diffusers"] = ["diffusers==0.31.0", "accelerate==1.1.0"] # Includes `peft` as PEFT requires `torch` so having `peft` as a core dependency # means that `torch` will be installed even if the `torch` extra is not specified. -extras["torch"] = ["torch==2.3.1", "torchvision", "torchaudio", "peft==0.13.2"] +extras["torch"] = ["torch==2.3.1", "torchvision", "torchaudio", "peft==0.14.0"] extras["test"] = [ "pytest==7.2.1", "pytest-xdist", diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py index 70b683ab..47ddf390 100644 --- a/src/huggingface_inference_toolkit/diffusers_utils.py +++ b/src/huggingface_inference_toolkit/diffusers_utils.py @@ -22,9 +22,7 @@ def is_diffusers_available(): class IEAutoPipelineForText2Image: - def __init__( - self, model_dir: str, device: Union[str, None] = None, **kwargs - ): # needs "cuda" for GPU + def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs): # needs "cuda" for GPU dtype = torch.float32 if device == "cuda": dtype = torch.bfloat16 if is_torch_bf16_gpu_available() else torch.float16 @@ -36,9 +34,7 @@ def __init__( # try to use DPMSolverMultistepScheduler if isinstance(self.pipeline, StableDiffusionPipeline): try: - self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config( - self.pipeline.scheduler.config - ) + self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config) except Exception: pass @@ -47,6 +43,13 @@ def __call__( prompt, **kwargs, ): + if "prompt" in kwargs: + logger.warning( + "prompt has been provided twice, both via arg and kwargs, so the `prompt` arg will be used " + "instead, and the `prompt` in kwargs will be discarded." + ) + kwargs.pop("prompt") + # diffusers doesn't support seed but rather the generator kwarg # see: https://github.com/huggingface/api-inference-community/blob/8e577e2d60957959ba02f474b2913d84a9086b82/docker_images/diffusers/app/pipelines/text_to_image.py#L172-L176 if "seed" in kwargs: @@ -58,9 +61,16 @@ def __call__( # TODO: add support for more images (Reason is correct output) if "num_images_per_prompt" in kwargs: kwargs.pop("num_images_per_prompt") - logger.warning( - "Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1." - ) + logger.warning("Sending num_images_per_prompt > 1 to pipeline is not supported. Using default value 1.") + + if "target_size" in kwargs: + kwargs["height"] = kwargs["target_size"].pop("height", None) + kwargs["width"] = kwargs["target_size"].pop("width", None) + kwargs.pop("target_size") + + if "output_type" in kwargs and kwargs["output_type"] != "pil": + kwargs.pop("output_type") + logger.warning("The `output_type` cannot be modified, and PIL will be used by default instead.") # Call pipeline with parameters out = self.pipeline(prompt, num_images_per_prompt=1, **kwargs) diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py index 636f185b..66f4acdc 100644 --- a/src/huggingface_inference_toolkit/handler.py +++ b/src/huggingface_inference_toolkit/handler.py @@ -1,8 +1,9 @@ import os from pathlib import Path -from typing import Optional, Union +from typing import Any, Dict, Literal, Optional, Union from huggingface_inference_toolkit.const import HF_TRUST_REMOTE_CODE +from huggingface_inference_toolkit.sentence_transformers_utils import SENTENCE_TRANSFORMERS_TASKS from huggingface_inference_toolkit.utils import ( check_and_register_custom_pipeline_from_directory, get_pipeline, @@ -12,18 +13,20 @@ class HuggingFaceHandler: """ A Default Hugging Face Inference Handler which works with all - transformers pipelines, Sentence Transformers and Optimum. + Transformers, Diffusers, Sentence Transformers and Optimum pipelines. """ - def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): + def __init__( + self, model_dir: Union[str, Path], task: Union[str, None] = None, framework: Literal["pt"] = "pt" + ) -> None: self.pipeline = get_pipeline( - model_dir=model_dir, - task=task, + model_dir=model_dir, # type: ignore + task=task, # type: ignore framework=framework, trust_remote_code=HF_TRUST_REMOTE_CODE, ) - def __call__(self, data): + def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ Handles an inference request with input data and makes a prediction. Args: @@ -31,15 +34,66 @@ def __call__(self, data): :return: prediction output """ inputs = data.pop("inputs", data) - parameters = data.pop("parameters", None) - - # pass inputs with all kwargs in data - if parameters is not None: - prediction = self.pipeline(inputs, **parameters) - else: - prediction = self.pipeline(inputs) - # postprocess the prediction - return prediction + parameters = data.pop("parameters", {}) + + # sentence transformers pipelines do not have the `task` arg + if any(isinstance(self.pipeline, v) for v in SENTENCE_TRANSFORMERS_TASKS.values()): + return self.pipeline(**inputs) if isinstance(inputs, dict) else self.pipeline(inputs) # type: ignore + + if self.pipeline.task == "question-answering": + if not isinstance(inputs, dict): + raise ValueError(f"inputs must be a dict, but a `{type(inputs)}` was provided instead.") + if not all(k in inputs for k in {"question", "context"}): + raise ValueError( + f"{self.pipeline.task} expects `inputs` to be a dict containing both `question` and " + "`context` as the keys, both of them being either a `str` or a `List[str]`." + ) + + if self.pipeline.task == "table-question-answering": + if not isinstance(inputs, dict): + raise ValueError(f"inputs must be a dict, but a `{type(inputs)}` was provided instead.") + if "question" in inputs: + inputs["query"] = inputs.pop("question") + if not all(k in inputs for k in {"table", "query"}): + raise ValueError( + f"{self.pipeline.task} expects `inputs` to be a dict containing the keys `table` and " + "either `question` or `query`." + ) + + if self.pipeline.task.__contains__("translation") or self.pipeline.task in { + "text-generation", + "image-to-text", + "automatic-speech-recognition", + "text-to-audio", + "text-to-speech", + }: + # `generate_kwargs` needs to be a dict, `generation_parameters` is here for forward compatibility + if "generation_parameters" in parameters: + parameters["generate_kwargs"] = parameters.pop("generation_parameters") + + if self.pipeline.task.__contains__("translation") or self.pipeline.task in {"text-generation"}: + # flatten the values of `generate_kwargs` as it's not supported as is, but via top-level parameters + generate_kwargs = parameters.pop("generate_kwargs", {}) + for key, value in generate_kwargs.items(): + parameters[key] = value + + if self.pipeline.task.__contains__("zero-shot-classification"): + if "candidateLabels" in parameters: + parameters["candidate_labels"] = parameters.pop("candidateLabels") + if not isinstance(inputs, dict): + inputs = {"sequences": inputs} + if "text" in inputs: + inputs["sequences"] = inputs.pop("text") + if not all(k in inputs for k in {"sequences"}) or not all(k in parameters for k in {"candidate_labels"}): + raise ValueError( + f"{self.pipeline.task} expects `inputs` to be either a string or a dict containing the " + "key `text` or `sequences`, and `parameters` to be a dict containing either `candidate_labels` " + "or `candidateLabels`." + ) + + return ( + self.pipeline(**inputs, **parameters) if isinstance(inputs, dict) else self.pipeline(inputs, **parameters) # type: ignore + ) class VertexAIHandler(HuggingFaceHandler): @@ -48,10 +102,12 @@ class VertexAIHandler(HuggingFaceHandler): Vertex AI specific logic for inference. """ - def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"): - super().__init__(model_dir, task, framework) + def __init__( + self, model_dir: Union[str, Path], task: Union[str, None] = None, framework: Literal["pt"] = "pt" + ) -> None: + super().__init__(model_dir=model_dir, task=task, framework=framework) - def __call__(self, data): + def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ Handles an inference request with input data and makes a prediction. Args: @@ -59,10 +115,8 @@ def __call__(self, data): :return: prediction output """ if "instances" not in data: - raise ValueError( - "The request body must contain a key 'instances' with a list of instances." - ) - parameters = data.pop("parameters", None) + raise ValueError("The request body must contain a key 'instances' with a list of instances.") + parameters = data.pop("parameters", {}) predictions = [] # iterate over all instances and make predictions @@ -74,9 +128,7 @@ def __call__(self, data): return {"predictions": predictions} -def get_inference_handler_either_custom_or_default_handler( - model_dir: Path, task: Optional[str] = None -): +def get_inference_handler_either_custom_or_default_handler(model_dir: Path, task: Optional[str] = None) -> Any: """ Returns the appropriate inference handler based on the given model directory and task. @@ -88,9 +140,10 @@ def get_inference_handler_either_custom_or_default_handler( InferenceHandler: The appropriate inference handler based on the given model directory and task. """ custom_pipeline = check_and_register_custom_pipeline_from_directory(model_dir) - if custom_pipeline: + if custom_pipeline is not None: return custom_pipeline - elif os.environ.get("AIP_MODE", None) == "PREDICTION": + + if os.environ.get("AIP_MODE", None) == "PREDICTION": return VertexAIHandler(model_dir=model_dir, task=task) - else: - return HuggingFaceHandler(model_dir=model_dir, task=task) + + return HuggingFaceHandler(model_dir=model_dir, task=task) diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index 6b55ae76..0d648420 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -1,4 +1,10 @@ import importlib.util +from typing import Any, Dict, List, Tuple, Union + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal _sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None @@ -12,40 +18,73 @@ def is_sentence_transformers_available(): class SentenceSimilarityPipeline: - def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU + def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None: + # `device` needs to be set to "cuda" for GPU self.model = SentenceTransformer(model_dir, device=device, **kwargs) - def __call__(self, inputs=None): - embeddings1 = self.model.encode( - inputs["source_sentence"], convert_to_tensor=True - ) - embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True) + def __call__(self, source_sentence: str, sentences: List[str]) -> Dict[str, float]: + embeddings1 = self.model.encode(source_sentence, convert_to_tensor=True) + embeddings2 = self.model.encode(sentences, convert_to_tensor=True) similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0] return {"similarities": similarities} class SentenceEmbeddingPipeline: - def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU + def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None: + # `device` needs to be set to "cuda" for GPU self.model = SentenceTransformer(model_dir, device=device, **kwargs) - def __call__(self, inputs): - embeddings = self.model.encode(inputs).tolist() + def __call__(self, sentences: Union[str, List[str]]) -> Dict[str, List[float]]: + embeddings = self.model.encode(sentences).tolist() return {"embeddings": embeddings} -class RankingPipeline: - def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU +class SentenceRankingPipeline: + def __init__(self, model_dir: str, device: Union[str, None] = None, **kwargs: Any) -> None: + # `device` needs to be set to "cuda" for GPU self.model = CrossEncoder(model_dir, device=device, **kwargs) - def __call__(self, inputs): - scores = self.model.predict(inputs).tolist() - return {"scores": scores} + def __call__( + self, + sentences: Union[Tuple[str, str], List[str], List[List[str]], List[Tuple[str, str]], None] = None, + query: Union[str, None] = None, + texts: Union[List[str], None] = None, + return_documents: bool = False, + ) -> Union[Dict[str, List[float]], List[Dict[Literal["index", "score", "text"], Any]]]: + if all(x is not None for x in [sentences, query, texts]): + raise ValueError( + f"The provided payload contains {sentences=} (i.e. 'inputs'), {query=}, and {texts=}" + " but all of those cannot be provided, you should provide either only 'sentences' i.e. 'inputs'" + " of both 'query' and 'texts' to run the ranking task." + ) + + if all(x is None for x in [sentences, query, texts]): + raise ValueError( + "No inputs have been provided within the input payload, make sure that the input payload" + " contains either 'sentences' i.e. 'inputs', or both 'query' and 'texts' to run the ranking task." + ) + + if sentences is not None: + scores = self.model.predict(sentences).tolist() + return {"scores": scores} + + if query is None or not isinstance(query, str): + raise ValueError(f"Provided {query=} but a non-empty string should be provided instead.") + + if texts is None or not isinstance(texts, list) or not all(isinstance(text, str) for text in texts): + raise ValueError(f"Provided {texts=}, but a list of non-empty strings should be provided instead.") + + scores = self.model.rank(query, texts, return_documents=return_documents) + # rename "corpus_id" key to "index" for all scores to match TEI + for score in scores: + score["index"] = score.pop("corpus_id") # type: ignore + return scores # type: ignore SENTENCE_TRANSFORMERS_TASKS = { "sentence-similarity": SentenceSimilarityPipeline, "sentence-embeddings": SentenceEmbeddingPipeline, - "sentence-ranking": RankingPipeline, + "sentence-ranking": SentenceRankingPipeline, } @@ -56,9 +95,5 @@ def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **k kwargs.pop("framework", None) if task not in SENTENCE_TRANSFORMERS_TASKS: - raise ValueError( - f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}" - ) - return SENTENCE_TRANSFORMERS_TASKS[task]( - model_dir=model_dir, device=device, **kwargs - ) + raise ValueError(f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}") + return SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device, **kwargs) diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index a5ff7aee..8dc644c7 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -134,9 +134,7 @@ def _load_repository_from_hf( # create regex to only include the framework specific weights ignore_regex = create_artifact_filter(framework) - logger.info( - f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }" - ) + logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }") # Download the repository to the workdir and filter out non-framework # specific weights @@ -177,9 +175,7 @@ def check_and_register_custom_pipeline_from_directory(model_dir): Please update to the new format. See documentation for more information.""" ) - spec = importlib.util.spec_from_file_location( - "pipeline.PreTrainedPipeline", legacy_module - ) + spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module) if spec: # add the whole directory to path for submodlues sys.path.insert(0, model_dir) @@ -208,23 +204,28 @@ def get_device(): def get_pipeline( - task: str, + task: Union[str, None], model_dir: Path, **kwargs, ) -> Pipeline: """ create pipeline class for a specific task based on local saved model """ - device = get_device() - if is_optimum_neuron_available(): - logger.info("Using device Neuron") - else: - logger.info(f"Using device { 'GPU' if device == 0 else 'CPU'}") - if task is None: raise EnvironmentError( "The task for this model is not set: Please set one: https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined" ) + + if task == "conversational": + task = "text-generation" + + if is_optimum_neuron_available(): + logger.info("Using device Neuron") + return get_optimum_neuron_pipeline(task=task, model_dir=model_dir) + + device = get_device() + logger.info(f"Using device {'GPU' if device == 0 else 'CPU'}") + # define tokenizer or feature extractor as kwargs to load it the pipeline # correctly if task in { @@ -236,41 +237,27 @@ def get_pipeline( "zero-shot-image-classification", }: kwargs["feature_extractor"] = model_dir - elif task in {"image-to-text", "text-to-image"}: - pass - elif task == "conversational": - task = "text-generation" - else: + elif task not in {"image-to-text", "text-to-image"}: kwargs["tokenizer"] = model_dir - if is_optimum_neuron_available(): - hf_pipeline = get_optimum_neuron_pipeline(task=task, model_dir=model_dir) - elif is_sentence_transformers_available() and task in [ + if is_sentence_transformers_available() and task in [ "sentence-similarity", "sentence-embeddings", "sentence-ranking", ]: - hf_pipeline = get_sentence_transformers_pipeline( - task=task, model_dir=model_dir, device=device, **kwargs - ) + hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) elif is_diffusers_available() and task == "text-to-image": - hf_pipeline = get_diffusers_pipeline( - task=task, model_dir=model_dir, device=device, **kwargs - ) + hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs) else: hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs) - if task == "automatic-speech-recognition" and isinstance( - hf_pipeline.model, WhisperForConditionalGeneration - ): + if task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration): # set chunk length to 30s for whisper to enable long audio files hf_pipeline._preprocess_params["chunk_length_s"] = 30 - hf_pipeline.model.config.forced_decoder_ids = ( - hf_pipeline.tokenizer.get_decoder_prompt_ids( - language="english", task="transcribe" - ) + hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids( + language="english", task="transcribe" ) - return hf_pipeline + return hf_pipeline # type: ignore def convert_params_to_int_or_bool(params): diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py index 052a5bfc..2935d6e7 100644 --- a/tests/unit/test_handler.py +++ b/tests/unit/test_handler.py @@ -1,4 +1,5 @@ import tempfile +from typing import Dict import pytest from transformers.testing_utils import require_tf, require_torch @@ -14,11 +15,16 @@ TASK = "text-classification" MODEL = "hf-internal-testing/tiny-random-distilbert" -INPUT = {"inputs": "My name is Wolfgang and I live in Berlin"} + + +# defined as fixture because it's modified on `pop` +@pytest.fixture +def input_data(): + return {"inputs": "My name is Wolfgang and I live in Berlin"} @require_torch -def test_pt_get_device(): +def test_pt_get_device() -> None: import torch with tempfile.TemporaryDirectory() as tmpdirname: @@ -32,52 +38,45 @@ def test_pt_get_device(): @require_torch -def test_pt_predict_call(): +def test_pt_predict_call(input_data: Dict[str, str]) -> None: with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch") h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) - prediction = h(INPUT) + prediction = h(input_data) assert "label" in prediction[0] assert "score" in prediction[0] @require_torch -def test_pt_custom_pipeline(): +def test_pt_custom_pipeline(input_data: Dict[str, str]) -> None: with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch", ) - h = get_inference_handler_either_custom_or_default_handler( - str(storage_dir), task="custom" - ) - assert h(INPUT) == INPUT + h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom") + assert h(input_data) == input_data @require_torch -def test_pt_sentence_transformers_pipeline(): +def test_pt_sentence_transformers_pipeline(input_data: Dict[str, str]) -> None: with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="pytorch" ) - h = get_inference_handler_either_custom_or_default_handler( - str(storage_dir), task="sentence-embeddings" - ) - pred = h(INPUT) + h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") + pred = h(input_data) assert isinstance(pred["embeddings"], list) @require_tf def test_tf_get_device(): - with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf( - MODEL, tmpdirname, framework="tensorflow" - ) + storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK) if _is_gpu_available(): assert h.pipeline.device == 0 @@ -86,33 +85,27 @@ def test_tf_get_device(): @require_tf -def test_tf_predict_call(): +def test_tf_predict_call(input_data: Dict[str, str]) -> None: with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf( - MODEL, tmpdirname, framework="tensorflow" - ) - handler = HuggingFaceHandler( - model_dir=str(storage_dir), task=TASK, framework="tf" - ) + storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow") + handler = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK, framework="tf") - prediction = handler(INPUT) + prediction = handler(input_data) assert "label" in prediction[0] assert "score" in prediction[0] @require_tf -def test_tf_custom_pipeline(): +def test_tf_custom_pipeline(input_data: Dict[str, str]) -> None: with tempfile.TemporaryDirectory() as tmpdirname: storage_dir = _load_repository_from_hf( "philschmid/custom-pipeline-text-classification", tmpdirname, framework="tensorflow", ) - h = get_inference_handler_either_custom_or_default_handler( - str(storage_dir), task="custom" - ) - assert h(INPUT) == INPUT + h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom") + assert h(input_data) == input_data @require_tf @@ -123,6 +116,4 @@ def test_tf_sentence_transformers_pipeline(): "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="tensorflow" ) with pytest.raises(Exception) as _exc_info: - get_inference_handler_either_custom_or_default_handler( - str(storage_dir), task="sentence-embeddings" - ) + get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings") diff --git a/tests/unit/test_sentence_transformers.py b/tests/unit/test_sentence_transformers.py index 635f39de..e48533bc 100644 --- a/tests/unit/test_sentence_transformers.py +++ b/tests/unit/test_sentence_transformers.py @@ -1,5 +1,6 @@ import tempfile +import pytest from transformers.testing_utils import require_torch from huggingface_inference_toolkit.sentence_transformers_utils import ( @@ -15,9 +16,7 @@ @require_torch def test_get_sentence_transformers_pipeline(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf( - "sentence-transformers/all-MiniLM-L6-v2", tmpdirname - ) + storage_dir = _load_repository_from_hf("sentence-transformers/all-MiniLM-L6-v2", tmpdirname) pipe = get_pipeline("sentence-embeddings", storage_dir.as_posix()) assert isinstance(pipe, SentenceEmbeddingPipeline) @@ -25,22 +24,21 @@ def test_get_sentence_transformers_pipeline(): @require_torch def test_sentence_embedding_task(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf( - "sentence-transformers/all-MiniLM-L6-v2", tmpdirname - ) + storage_dir = _load_repository_from_hf("sentence-transformers/all-MiniLM-L6-v2", tmpdirname) pipe = get_sentence_transformers_pipeline("sentence-embeddings", storage_dir.as_posix()) - res = pipe("Lets create an embedding") + res = pipe(sentences="Lets create an embedding") + assert isinstance(res["embeddings"], list) + res = pipe(sentences=["Lets create an embedding", "Lets create another embedding"]) assert isinstance(res["embeddings"], list) + assert len(res["embeddings"]) == 2 @require_torch def test_sentence_similarity(): with tempfile.TemporaryDirectory() as tmpdirname: - storage_dir = _load_repository_from_hf( - "sentence-transformers/all-MiniLM-L6-v2", tmpdirname - ) + storage_dir = _load_repository_from_hf("sentence-transformers/all-MiniLM-L6-v2", tmpdirname) pipe = get_sentence_transformers_pipeline("sentence-similarity", storage_dir.as_posix()) - res = pipe({"source_sentence": "Lets create an embedding", "sentences": ["Lets create an embedding"]}) + res = pipe(source_sentence="Lets create an embedding", sentences=["Lets create an embedding"]) assert isinstance(res["similarities"], list) @@ -50,13 +48,67 @@ def test_sentence_ranking(): storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname) pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix()) res = pipe( - [ - ["Lets create an embedding", "Lets create an embedding"], - ["Lets create an embedding", "Lets create an embedding"], + sentences=[ + ["Lets create an embedding", "Lets create another embedding"], + ["Lets create an embedding", "Lets create another embedding"], ] ) assert isinstance(res["scores"], list) + res = pipe(sentences=["Lets create an embedding", "Lets create an embedding"]) + assert isinstance(res["scores"], float) + + +@require_torch +def test_sentence_ranking_tei(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname, framework="pytorch") + pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix()) res = pipe( - ["Lets create an embedding", "Lets create an embedding"], + query="Lets create an embedding", + texts=["Lets create an embedding", "I like noodles"], ) - assert isinstance(res["scores"], float) + assert isinstance(res, list) + assert all(r.keys() == {"index", "score"} for r in res) + + res = pipe( + query="Lets create an embedding", + texts=["Lets create an embedding", "I like noodles"], + return_documents=True, + ) + assert isinstance(res, list) + assert all(r.keys() == {"index", "score", "text"} for r in res) + + +@require_torch +def test_sentence_ranking_validation_errors(): + with tempfile.TemporaryDirectory() as tmpdirname: + storage_dir = _load_repository_from_hf("cross-encoder/ms-marco-MiniLM-L-6-v2", tmpdirname, framework="pytorch") + pipe = get_sentence_transformers_pipeline("sentence-ranking", storage_dir.as_posix()) + + with pytest.raises( + ValueError, + match=( + "you should provide either only 'sentences' i.e. 'inputs' " + "of both 'query' and 'texts' to run the ranking task." + ), + ): + pipe( + sentences="Lets create an embedding", + query="Lets create an embedding", + texts=["Lets create an embedding", "I like noodles"], + ) + + with pytest.raises( + ValueError, + match=( + "No inputs have been provided within the input payload, make sure that the input payload " + "contains either 'sentences' i.e. 'inputs', or both 'query' and 'texts'" + ), + ): + pipe(sentences=None, query=None, texts=None) + + with pytest.raises( + ValueError, + match=("Provided texts=None, but a list of non-empty strings should be provided instead."), + ): + pipe(sentences=None, query="Lets create an embedding", texts=None)