Skip to content

Commit 00ec9b3

Browse files
authored
Fix stream without prompt format (#75)
* fix bug for stream generate of llamacpp * fix steam without prompt format * update log
1 parent cc0e11c commit 00ec9b3

File tree

2 files changed

+5
-9
lines changed

2 files changed

+5
-9
lines changed

llmserve/backend/llm/pipelines/llamacpp/llamacpp_pipeline.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -255,15 +255,11 @@ def streamGenerate(self, prompt: str, **generate_kwargs) -> Generator[str, None,
255255
val = delta['content']
256256
yield val
257257
else:
258-
generate_kwargs.pop('max_tokens', None)
259-
input_ids = self.tokenizer.encode(inputs[0])
260-
# logger.info(f"model generate : {input_ids}")
261258
logger.info(f"generate_kwargs: {generate_kwargs}")
262-
output = self.model.generate(tokens=input_ids, **generate_kwargs)
259+
output = self.model(inputs[0], stream=True, **generate_kwargs)
263260
for token in output:
264-
val = self.model.detokenize([token])
265-
# logger.info(f'LlamaCppPipeline -> generate -> Yield -> "{val}" -> "{type(val)}"')
266-
chunk = val.decode('utf-8')
261+
# logger.info(f'LlamaCppPipeline -> generate -> Yield -> "{token}" -> "{type(token)}"')
262+
chunk = token["choices"][0]["text"].replace("\u200b", "")
267263
logger.info(f'LlamaCppPipeline -> generate -> Yield -> "{chunk}"')
268264
yield chunk
269265

models/text-generation--Qwen1.5-72B-Chat-GGUF.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ model_config:
1616
warmup: True
1717
model_task: text-generation
1818
model_id: Qwen/Qwen1.5-72B-Chat-GGUF
19-
max_input_words: 512
19+
max_input_words: 1024
2020
initialization:
2121
# s3_mirror_config:
2222
# bucket_uri: /data/models/Qwen1.5-72B-Chat-GGUF/
@@ -31,7 +31,7 @@ model_config:
3131
max_batch_size: 1
3232
batch_wait_timeout_s: 0
3333
generate_kwargs:
34-
max_tokens: 512
34+
max_tokens: 1024
3535
prompt_format: '[{{"role": "system", "content": "You are a helpful assistant."}},{{"role": "user", "content": "{instruction}"}}]'
3636
stopping_sequences: ["<|im_end|>"]
3737
scaling_config:

0 commit comments

Comments
 (0)