Skip to content

Commit e10e433

Browse files
[Feat] Add reasoning parser for OpenAI API (#1154)
1 parent f6e7aef commit e10e433

File tree

6 files changed

+1153
-24
lines changed

6 files changed

+1153
-24
lines changed

lightllm/server/api_cli.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,27 @@ def make_argument_parser() -> argparse.ArgumentParser:
132132
default=None,
133133
help="tool call parser type",
134134
)
135+
parser.add_argument(
136+
"--reasoning_parser",
137+
type=str,
138+
choices=[
139+
"deepseek-r1",
140+
"deepseek-v3",
141+
"glm45",
142+
"gpt-oss",
143+
"kimi",
144+
"kimi_k2",
145+
"qwen3",
146+
"qwen3-thinking",
147+
"minimax",
148+
"minimax-append-think",
149+
"step3",
150+
"nano_v3",
151+
"interns1",
152+
],
153+
default=None,
154+
help="reasoning parser type",
155+
)
135156
parser.add_argument(
136157
"--chat_template",
137158
type=str,

lightllm/server/api_models.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class CompletionRequest(BaseModel):
7373
# prompt: string or tokens
7474
prompt: Union[str, List[str], List[int], List[List[int]]]
7575
suffix: Optional[str] = None
76-
max_tokens: Optional[int] = 16
76+
max_tokens: Optional[int] = 8192
7777
temperature: Optional[float] = 1.0
7878
top_p: Optional[float] = 1.0
7979
n: Optional[int] = 1
@@ -145,7 +145,7 @@ class ChatCompletionRequest(BaseModel):
145145
stream: Optional[bool] = False
146146
stream_options: Optional[StreamOptions] = None
147147
stop: Optional[Union[str, List[str]]] = None
148-
max_tokens: Optional[int] = 16
148+
max_tokens: Optional[int] = 8192
149149
presence_penalty: Optional[float] = 0.0
150150
frequency_penalty: Optional[float] = 0.0
151151
logit_bias: Optional[Dict[str, float]] = None
@@ -166,14 +166,18 @@ class ChatCompletionRequest(BaseModel):
166166
) # noqa
167167
parallel_tool_calls: Optional[bool] = True
168168

169+
# OpenAI parameters for reasoning and others
170+
chat_template_kwargs: Optional[Dict] = None
171+
separate_reasoning: Optional[bool] = True
172+
stream_reasoning: Optional[bool] = False
173+
169174
# Additional parameters supported by LightLLM
170175
do_sample: Optional[bool] = True
171176
top_k: Optional[int] = -1
172177
repetition_penalty: Optional[float] = 1.0
173178
ignore_eos: Optional[bool] = False
174179
role_settings: Optional[Dict[str, str]] = None
175180
character_settings: Optional[List[Dict[str, str]]] = None
176-
chat_template_kwargs: Optional[Dict[str, bool]] = None
177181

178182
# Class variables to store loaded default values
179183
_loaded_defaults: ClassVar[Dict[str, Any]] = {}
@@ -255,8 +259,9 @@ class UsageInfo(BaseModel):
255259

256260

257261
class ChatMessage(BaseModel):
258-
role: str
259-
content: str
262+
role: Optional[str] = None
263+
content: Optional[str] = None
264+
reasoning_content: Optional[str] = None
260265
tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None])
261266

262267

@@ -283,6 +288,7 @@ class DeltaMessage(BaseModel):
283288
role: Optional[str] = None
284289
content: Optional[str] = None
285290
tool_calls: Optional[List[ToolCall]] = Field(default=None, examples=[None])
291+
reasoning_content: Optional[str] = None
286292

287293

288294
class ChatCompletionStreamResponseChoice(BaseModel):

lightllm/server/api_openai.py

Lines changed: 88 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import pickle
1010
import uuid
1111

12+
from lightllm.server.reasoning_parser import ReasoningParser
13+
1214
from .function_call_parser import TOOLS_TAG_LIST, FunctionCallParser, ToolCallItem
1315
from .build_prompt import build_prompt, init_tokenizer
1416

@@ -17,7 +19,7 @@
1719
from http import HTTPStatus
1820
from PIL import Image
1921
import multiprocessing as mp
20-
from typing import AsyncGenerator, Union, List, Dict
22+
from typing import Any, AsyncGenerator, Optional, Union, List, Dict
2123
from typing import Callable
2224
from lightllm.server import TokenLoad
2325
from fastapi import BackgroundTasks, FastAPI, Request, WebSocket, WebSocketDisconnect
@@ -109,6 +111,38 @@ def _get_history_tool_calls_cnt(request: ChatCompletionRequest) -> int:
109111
return idx
110112

111113

114+
def _get_reasoning_from_request(request: ChatCompletionRequest) -> bool:
115+
"""Judge whether the request needs reasoning"""
116+
reasoning_parser = get_env_start_args().reasoning_parser
117+
if not reasoning_parser:
118+
return False
119+
if reasoning_parser in ["deepseek-v3"]:
120+
return request.chat_template_kwargs is not None and request.chat_template_kwargs.get("thinking") is True
121+
if reasoning_parser in ["qwen3", "glm45", "nano_v3", "interns1"]:
122+
# qwen3, glm45, nano_v3, and interns1 are reasoning by default
123+
return not request.chat_template_kwargs or request.chat_template_kwargs.get("enable_thinking", True) is True
124+
return True # default
125+
126+
127+
def _process_reasoning_stream(
128+
index: int,
129+
delta: str,
130+
reasoning_parser_dict: Dict[int, ReasoningParser],
131+
content: Dict[str, Any],
132+
request: ChatCompletionRequest,
133+
) -> tuple[Optional[str], str]:
134+
"""Process reasoning content in streaming response"""
135+
if index not in reasoning_parser_dict:
136+
request_enable_reasoning = _get_reasoning_from_request(request)
137+
reasoning_parser_dict[index] = ReasoningParser(
138+
get_env_start_args().reasoning_parser,
139+
request.stream_reasoning,
140+
request_enable_reasoning,
141+
)
142+
reasoning_parser = reasoning_parser_dict[index]
143+
return reasoning_parser.parse_stream_chunk(delta)
144+
145+
112146
async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Request) -> Response:
113147
from .api_http import g_objs
114148

@@ -226,10 +260,30 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req
226260

227261
finish_reason = finish_reason_dict[sub_req_id]
228262
text = "".join(final_output_dict[sub_req_id])
263+
264+
# Handle reasoning content
265+
reasoning_text = None
266+
reasoning_parser = get_env_start_args().reasoning_parser
267+
if reasoning_parser and request.separate_reasoning:
268+
request_enable_reasoning = _get_reasoning_from_request(request)
269+
try:
270+
parser = ReasoningParser(
271+
model_type=reasoning_parser,
272+
stream_reasoning=False,
273+
force_reasoning=request_enable_reasoning,
274+
)
275+
reasoning_text, text = parser.parse_non_stream(text)
276+
except Exception as e:
277+
logger.error(f"Reasoning parsing error: {e}")
278+
return create_error_response(
279+
HTTPStatus.BAD_REQUEST,
280+
"Failed to parse fc related info to json format!",
281+
)
282+
283+
# Handle tool_calls parsing
229284
tool_calls = None
230285
tool_choice = request.tool_choice
231286
tools = request.tools
232-
233287
if tool_choice != "none" and any([i in text for i in TOOLS_TAG_LIST]):
234288
if finish_reason == "stop":
235289
finish_reason = "tool_calls"
@@ -257,7 +311,12 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req
257311
)
258312
if finish_reason == "tool_calls":
259313
text = ""
260-
chat_message = ChatMessage(role="assistant", content=text, tool_calls=tool_calls)
314+
chat_message = ChatMessage(
315+
role="assistant",
316+
content=text if text else "",
317+
tool_calls=tool_calls,
318+
reasoning_content=reasoning_text if reasoning_text else "",
319+
)
261320
choice = ChatCompletionResponseChoice(
262321
index=i,
263322
message=chat_message,
@@ -273,6 +332,7 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req
273332
return create_error_response(HTTPStatus.BAD_REQUEST, "stream api only support n = 1")
274333

275334
parser_dict = {}
335+
reasoning_parser_dict = {}
276336

277337
# Streaming case
278338
async def stream_results() -> AsyncGenerator[bytes, None]:
@@ -284,12 +344,31 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
284344
async for sub_req_id, request_output, metadata, finish_status in results_generator:
285345
prompt_tokens = metadata["prompt_tokens"]
286346
completion_tokens += 1
287-
if request.tool_choice != "none" and request.tools:
288-
delta = request_output
289-
group_request_id = convert_sub_id_to_group_id(sub_req_id)
290-
index = sub_req_id
291-
finish_reason = finish_status.get_finish_reason()
347+
group_request_id = convert_sub_id_to_group_id(sub_req_id)
348+
index = sub_req_id
349+
delta = request_output
350+
finish_reason = finish_status.get_finish_reason()
351+
352+
# Handle reasoning content
353+
if get_env_start_args().reasoning_parser and request.separate_reasoning:
354+
reasoning_text, delta = _process_reasoning_stream(
355+
index, delta, reasoning_parser_dict, request_output, request
356+
)
357+
if reasoning_text:
358+
choice_data = ChatCompletionStreamResponseChoice(
359+
index=0,
360+
delta=DeltaMessage(reasoning_content=reasoning_text),
361+
finish_reason=None,
362+
)
363+
chunk = ChatCompletionStreamResponse(
364+
id=group_request_id,
365+
created=created_time,
366+
choices=[choice_data],
367+
model=request.model,
368+
)
369+
yield f"data: {chunk.model_dump_json()}\n\n"
292370

371+
if request.tool_choice != "none" and request.tools:
293372
if index not in parser_dict:
294373
# 为 tool_call_parser 提供默认值
295374
tool_parser = getattr(g_objs.args, "tool_call_parser", None) or "llama3"
@@ -368,7 +447,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
368447
else:
369448
group_request_id = convert_sub_id_to_group_id(sub_req_id)
370449

371-
delta_message = DeltaMessage(role="assistant", content=request_output)
450+
delta_message = DeltaMessage(role="assistant", content=delta)
372451
if finish_status.is_finished():
373452
finish_reason = finish_status.get_finish_reason()
374453
stream_choice = ChatCompletionStreamResponseChoice(

lightllm/server/core/objs/start_args_type.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,26 @@ class StartArgs:
3333
tool_call_parser: Optional[str] = field(
3434
default=None, metadata={"choices": ["llama3", "qwen25", "mistral", "deepseekv3", "kimi_k2", "qwen"]}
3535
)
36+
reasoning_parser: Optional[str] = field(
37+
default=None,
38+
metadata={
39+
"choices": [
40+
"deepseek-r1",
41+
"deepseek-v3",
42+
"glm45",
43+
"gpt-oss",
44+
"kimi",
45+
"kimi_k2",
46+
"qwen3",
47+
"qwen3-thinking",
48+
"minimax",
49+
"minimax-append-think",
50+
"step3",
51+
"nano_v3",
52+
"interns1",
53+
]
54+
},
55+
)
3656
chat_template: Optional[str] = field(default=None)
3757
running_max_req_size: int = field(default=1000)
3858
tp: int = field(default=1)

0 commit comments

Comments
 (0)