Skip to content

Commit 0cebcd3

Browse files
fix issue: stream generation is slow (#80)
1 parent c2c9c6a commit 0cebcd3

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

llmserve/backend/llm/pipelines/default_pipeline.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,11 @@ def postprocess(self, model_outputs, **postprocess_kwargs) -> List[Response]:
169169

170170
def streamGenerate(self, prompt: str, **generate_kwargs) -> Generator[str, None, None]:
171171
logger.info(f"DefaultPipeline.streamGenerate with generate_kwargs: {generate_kwargs}")
172-
streamer = TextIteratorStreamer(self.tokenizer, timeout=0, skip_prompt=True, skip_special_tokens=True)
172+
# timeout=0 will dramatic slow down the speed of generator, the root caused still unknow
173+
streamer = TextIteratorStreamer(self.tokenizer,
174+
# timeout=0,
175+
skip_prompt=True,
176+
skip_special_tokens=True)
173177
input_ids = self.tokenizer([prompt], return_tensors="pt")
174178
# generation_kwargs = dict(input_ids, streamer=streamer, max_new_tokens=20)
175179
max_new_tokens = 256

0 commit comments

Comments
 (0)