[HF][streaming][1/n] Text Summarization

Rossdan Craig rossdan@lastmileai.dev · Rossdan Craig rossdan@lastmileai.dev · commit 074b768ba6ee · 2024-01-10T05:08:09.000-05:00
TSIA Adding streaming functionality to text summarization model parser ## Test Plan Rebase onto and test it with 11ace0a. Follow the README from AIConfig Editor https://github.com/lastmile-ai/aiconfig/tree/main/python/src/aiconfig/editor#dev, then run these command ```bash aiconfig_path=/Users/rossdancraig/Projects/aiconfig/cookbooks/Gradio/huggingface.aiconfig.json parsers_path=/Users/rossdancraig/Projects/aiconfig/cookbooks/Gradio/hf_model_parsers.py alias aiconfig="python3 -m 'aiconfig.scripts.aiconfig_cli'" aiconfig edit --aiconfig-path=$aiconfig_path --server-port=8080 --server-mode=debug_servers --parsers-module-path=$parsers_path ``` Then in AIConfig Editor run the prompt (it will be streaming format by default) https://github.com/lastmile-ai/aiconfig/assets/151060367/e91a1d8b-a3e9-459c-9eb1-2d8e5ec58e73
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py
@@ -251,7 +251,7 @@ async def run_inference(
             not "stream" in completion_data or completion_data.get("stream") != False
         )
         if should_stream:
-            tokenizer : AutoTokenizer = AutoTokenizer.from_pretrained(model_name)
+            tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(model_name)
             streamer = TextIteratorStreamer(tokenizer)
             completion_data["streamer"] = streamer
 
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py
@@ -128,13 +128,18 @@ def construct_stream_output(
             "metadata": {},
         }
     )
+
     accumulated_message = ""
     for new_text in streamer:
         if isinstance(new_text, str):
+            # For some reason these symbols aren't filtered out by the streamer
+            new_text = new_text.replace("</s>", "")
+            new_text = new_text.replace("<s>", "")
+
             accumulated_message += new_text
             options.stream_callback(new_text, accumulated_message, 0)
-
             output.data = accumulated_message
+
     return output
 
 
@@ -245,7 +250,9 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio
 
         # if stream enabled in runtime options and config, then stream. Otherwise don't stream.
         streamer = None
-        should_stream = (options.stream if options else False) and (not "stream" in completion_data or completion_data.get("stream") != False)
+        should_stream = (options.stream if options else False) and (
+            not "stream" in completion_data or completion_data.get("stream") != False
+        )
         if should_stream:
             tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(model_name)
             streamer = TextIteratorStreamer(tokenizer)

Original file line number	Diff line number	Diff line change
`@@ -251,7 +251,7 @@ async def run_inference(`
`251`	`251`	`not "stream" in completion_data or completion_data.get("stream") != False`
`252`	`252`	`)`
`253`	`253`	`if should_stream:`
`254`		`- tokenizer : AutoTokenizer = AutoTokenizer.from_pretrained(model_name)`
	`254`	`+ tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(model_name)`
`255`	`255`	`streamer = TextIteratorStreamer(tokenizer)`
`256`	`256`	`completion_data["streamer"] = streamer`
`257`	`257`