Skip to content

Commit cc0e11c

Browse files
authored
fix bug for stream generate of llamacpp (#74)
1 parent fd76c83 commit cc0e11c

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

llmserve/backend/llm/pipelines/llamacpp/llamacpp_pipeline.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,8 @@ def streamGenerate(self, prompt: str, **generate_kwargs) -> Generator[str, None,
241241

242242
logger.info(f"stream generate_kwargs: {generate_kwargs}")
243243
logger.info(f"model inputs: {inputs}")
244-
244+
generate_kwargs.pop('stopping_sequences', None)
245245
if chat_completion:
246-
generate_kwargs.pop('stopping_sequences', None)
247246
logger.info(f"chat generate_kwargs: {generate_kwargs}")
248247
output = self.model.create_chat_completion(messages=inputs[0], stream=True, **generate_kwargs)
249248
for chunk in output:
@@ -256,12 +255,17 @@ def streamGenerate(self, prompt: str, **generate_kwargs) -> Generator[str, None,
256255
val = delta['content']
257256
yield val
258257
else:
259-
input_ids = self.model.tokenizer(inputs)
258+
generate_kwargs.pop('max_tokens', None)
259+
input_ids = self.tokenizer.encode(inputs[0])
260+
# logger.info(f"model generate : {input_ids}")
261+
logger.info(f"generate_kwargs: {generate_kwargs}")
260262
output = self.model.generate(tokens=input_ids, **generate_kwargs)
261263
for token in output:
262264
val = self.model.detokenize([token])
263-
logger.info(f'LlamaCppPipeline -> generate -> Yield -> "{val}" -> "{type(val)}"')
264-
yield val
265+
# logger.info(f'LlamaCppPipeline -> generate -> Yield -> "{val}" -> "{type(val)}"')
266+
chunk = val.decode('utf-8')
267+
logger.info(f'LlamaCppPipeline -> generate -> Yield -> "{chunk}"')
268+
yield chunk
265269

266270
# streaming sample for test
267271
# start = 0

0 commit comments

Comments
 (0)