Skip to content

Commit 6013d57

Browse files
enable chat template applied for vllm integration (#65)
1 parent b9bfcbe commit 6013d57

File tree

1 file changed

+15
-1
lines changed
  • llmserve/backend/llm/engines/vllm

1 file changed

+15
-1
lines changed

llmserve/backend/llm/engines/vllm/vllm.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import uuid
1717
import ray
1818
from llmserve.backend.server.utils import render_gradio_params
19+
import json
1920

2021

2122
from vllm.outputs import RequestOutput
@@ -163,9 +164,22 @@ async def predict(
163164

164165
st = time.monotonic()
165166
request_id = str(uuid.uuid4())
167+
tokenizer = self.engine.engine.tokenizer
168+
prompt_text = inputs[0]
169+
170+
try:
171+
prompt_text_bak = prompt_text
172+
prompt_text = json.loads(prompt_text, strict=False)
173+
prompt_text = tokenizer.apply_chat_template(prompt_text, tokenize=False, add_generation_prompt=True)
174+
except Exception as ex:
175+
logger.warn(f"Exception apply_chat_template: {ex}")
176+
logger.info("Seems no chat template from user or the model donot has a 'chat template'")
177+
prompt_text = prompt_text_bak
178+
179+
logger.info(f"final prompt is: {prompt_text}")
166180
# Construct a results generator from VLLM
167181
results_generator: AsyncIterator[RequestOutput] = self.engine.generate(
168-
inputs[0],
182+
prompt_text,
169183
self._parse_sampling_params(sampling_params),
170184
request_id,
171185
)

0 commit comments

Comments
 (0)