Return only generated text for text-generation, update docs, improve unit tests

vblagoje · vblagoje · commit 2bb618c43f6d · 2023-03-17T21:23:18.000+01:00
diff --git a/haystack/nodes/prompt/prompt_node.py b/haystack/nodes/prompt/prompt_node.py
@@ -165,7 +165,7 @@ class PromptModel(BaseComponent):
     """
     The PromptModel class is a component that uses a pre-trained model to perform tasks based on a prompt. Out of
     the box, it supports model invocation layers for:
-    - Hugging Face transformers (all text2text-generation models)
+    - Hugging Face transformers (all text2text-generation and text-generation models)
     - OpenAI InstructGPT models
     - Azure OpenAI InstructGPT models
 
@@ -400,7 +400,8 @@ class PromptNode(BaseComponent):
     additional custom model invocation layers.
 
     We recommend using LLMs fine-tuned on a collection of datasets phrased as instructions, otherwise we find that the
-    LLM does not "follow" prompt instructions well. This is why we recommend using T5 flan or OpenAI InstructGPT models.
+    LLM does not "follow" prompt instructions well. The list of instructions following models increases every month,
+    and the current list includes: Flan, OpenAI InstructGPT, opt-iml, bloomz, and mt0 models.
 
     For more details, see  [PromptNode](https://docs.haystack.deepset.ai/docs/prompt_node).
     """
diff --git a/haystack/nodes/prompt/providers.py b/haystack/nodes/prompt/providers.py
@@ -117,7 +117,7 @@ def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union
 
 
 def instruction_following_models() -> List[str]:
-    return ["flan", "mt0", "bloomz", "davinci"]
+    return ["flan", "mt0", "bloomz", "davinci", "opt-iml"]
 
 
 class StopWordsCriteria(StoppingCriteria):
@@ -220,7 +220,7 @@ def __init__(
 
         if len(model_input_kwargs) > 0:
             logger.info("Using model input kwargs %s in %s", model_input_kwargs, self.__class__.__name__)
-
+        self.task_name = get_task(model_name_or_path, use_auth_token=use_auth_token)
         self.pipe = pipeline(
             model=model_name_or_path,
             device=self.devices[0] if "device_map" not in model_input_kwargs else None,
@@ -237,23 +237,34 @@ def invoke(self, *args, **kwargs):
         It takes a prompt and returns a list of generated text using the local Hugging Face transformers model
         :return: A list of generated text.
 
-        Note: Only kwargs relevant to Text2TextGenerationPipeline are passed to Hugging Face as model_input_kwargs.
-        Other kwargs are ignored.
+        Note: Only kwargs relevant to Text2TextGenerationPipeline and TextGenerationPipeline are passed to
+        Hugging Face as model_input_kwargs. Other kwargs are ignored.
         """
         output: List[Dict[str, str]] = []
         stop_words = kwargs.pop("stop_words", None)
         top_k = kwargs.pop("top_k", None)
         if kwargs and "prompt" in kwargs:
             prompt = kwargs.pop("prompt")
 
-            # Consider only Text2TextGenerationPipeline relevant, ignore others
-            # For more details refer to Hugging Face Text2TextGenerationPipeline documentation
+            # Consider only Text2TextGenerationPipeline and TextGenerationPipeline relevant, ignore others
+            # For more details refer to Hugging Face Text2TextGenerationPipeline and TextGenerationPipeline
+            # documentation
             # TODO resolve these kwargs from the pipeline signature
             model_input_kwargs = {
                 key: kwargs[key]
-                for key in ["return_tensors", "return_text", "clean_up_tokenization_spaces", "truncation"]
+                for key in [
+                    "return_tensors",
+                    "return_text",
+                    "return_full_text",
+                    "clean_up_tokenization_spaces",
+                    "truncation",
+                ]
                 if key in kwargs
             }
+            # Prefer return_full_text is False for text-generation (unless explicitly set)
+            # Thus only generated text is returned (excluding prompt)
+            if "text-generation" == self.task_name and "return_full_text" not in model_input_kwargs:
+                model_input_kwargs["return_full_text"] = False
             if stop_words:
                 sw = StopWordsCriteria(tokenizer=self.pipe.tokenizer, stop_words=stop_words)
                 model_input_kwargs["stopping_criteria"] = StoppingCriteriaList([sw])
@@ -302,7 +313,7 @@ def _ensure_token_limit(self, prompt: Union[str, List[Dict[str, str]]]) -> Union
     def supports(cls, model_name_or_path: str, **kwargs) -> bool:
         task_name: Optional[str] = None
         try:
-            task_name = get_task(model_name_or_path)
+            task_name = get_task(model_name_or_path, use_auth_token=kwargs.get("use_auth_token", None))
         except RuntimeError:
             # This will fail for all non-HF models
             return False
diff --git a/test/prompt/test_prompt_node.py b/test/prompt/test_prompt_node.py
@@ -409,10 +409,19 @@ def test_streaming_prompt_node():
 
 
 def test_prompt_node_with_text_generation_model():
+    # test simple prompting with text generation model
+    # by default, we force the model not return prompt text
+    # Thus text-generation models can be used with PromptNode
+    # just like text2text-generation models
     node = PromptNode("bigscience/bigscience-small-testing")
     r = node("Hello big science!")
     assert len(r[0]) > 0
 
+    # test prompting with parameter to return prompt text as well
+    # users can use this param to get the prompt text and the generated text
+    r = node("Hello big science!", return_full_text=True)
+    assert len(r[0]) > 0 and r[0].startswith("Hello big science!")
+
 
 @pytest.mark.integration
 @pytest.mark.parametrize("prompt_model", ["hf", "openai", "azure"], indirect=True)