Skip to content

Commit c757cf5

Browse files
authored
fix openai stream & update chat template (#710)
Co-authored-by: baishihao <[email protected]>
1 parent ef2201d commit c757cf5

File tree

3 files changed

+11
-2
lines changed

3 files changed

+11
-2
lines changed

lightllm/server/api_models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ class ChatCompletionRequest(BaseModel):
2424
# Additional parameters supported by LightLLM
2525
do_sample: Optional[bool] = False
2626
top_k: Optional[int] = -1
27+
repetition_penalty: Optional[float] = 1.0
2728
ignore_eos: Optional[bool] = False
29+
role_settings: Optional[Dict[str, str]] = None
30+
character_settings: Optional[List[Dict[str, str]]] = None
2831

2932

3033
class UsageInfo(BaseModel):

lightllm/server/api_server.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ async def chat_completions(request: ChatCompletionRequest, raw_request: Request)
206206
do_sample=request.do_sample,
207207
presence_penalty=request.presence_penalty,
208208
frequency_penalty=request.frequency_penalty,
209+
repetition_penalty=request.repetition_penalty,
209210
temperature=request.temperature,
210211
top_p=request.top_p,
211212
top_k=request.top_k,
@@ -283,7 +284,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
283284
model=request.model,
284285
choices=[stream_choice],
285286
)
286-
yield ("data: " + stream_resp.json(ensure_ascii=False) + "\n\n").encode("utf-8")
287+
yield ("data: " + json.dumps(stream_resp.dict(), ensure_ascii=False) + "\n\n").encode("utf-8")
287288

288289
background_tasks = BackgroundTasks()
289290
return StreamingResponse(stream_results(), media_type="text/event-stream", background=background_tasks)

lightllm/server/build_prompt.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,10 @@ def init_tokenizer(args):
1111
async def build_prompt(request) -> str:
1212
global tokenizer
1313
messages = request.messages
14-
input_str = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
14+
kwargs = {"conversation": messages}
15+
if request.character_settings:
16+
kwargs["character_settings"] = request.character_settings
17+
if request.role_settings:
18+
kwargs["role_setting"] = request.role_settings
19+
input_str = tokenizer.apply_chat_template(**kwargs, tokenize=False, add_generation_prompt=True)
1520
return input_str

0 commit comments

Comments
 (0)