Skip to content

Commit f69ba04

Browse files
lfr-0531JunyiXu-nv
authored andcommitted
[TRTLLM-9677][feat] Support DeepSeek-V3.2 tool parser (NVIDIA#10126)
Signed-off-by: Fanrong Li <23290157+lfr-0531@users.noreply.github.com>
1 parent 9f66169 commit f69ba04

File tree

4 files changed

+444
-0
lines changed

4 files changed

+444
-0
lines changed

tensorrt_llm/serve/openai_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ def __init__(self,
157157
self.tool_call_id_type = "random" # default tool call id type is random
158158
if self.model_config.model_type == "kimi_k2":
159159
self.tool_call_id_type = "kimi_k2"
160+
elif self.model_config.model_type == "deepseek_v32":
161+
self.tool_call_id_type = "deepseek_v32"
160162

161163
# as disagg-worker
162164
self.disagg_cluster_storage = None
Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
# Adapted from https://github.com/sgl-project/sglang/blob/0071fe9c407ad59f2803cc319e1bcaa3ac2021f1/python/sglang/srt/function_call/deepseekv32_detector.py
2+
import json
3+
import re
4+
from typing import List
5+
6+
from tensorrt_llm.logger import logger
7+
8+
from ..openai_protocol import ChatCompletionToolsParam as Tool
9+
from .base_tool_parser import BaseToolParser
10+
from .core_types import StreamingParseResult, StructureInfo, ToolCallItem, _GetInfoFunc
11+
12+
13+
class DeepSeekV32Parser(BaseToolParser):
14+
"""Tool parser for DeepSeek V3.2 model function call format.
15+
16+
The DeepSeek V3.2 format uses XML-like DSML tags to delimit function calls.
17+
Supports two parameter formats:
18+
19+
Format 1 - XML Parameter Tags:
20+
```
21+
<|DSML|function_calls>
22+
<|DSML|invoke name="function_name">
23+
<|DSML|parameter name="param_name" string="true">value</|DSML|parameter>
24+
...
25+
</|DSML|invoke>
26+
</|DSML|function_calls>
27+
```
28+
29+
Format 2 - Direct JSON:
30+
```
31+
<|DSML|function_calls>
32+
<|DSML|invoke name="function_name">
33+
{
34+
"param_name": "value"
35+
}
36+
</|DSML|invoke>
37+
</|DSML|function_calls>
38+
```
39+
40+
Examples:
41+
```
42+
<|DSML|function_calls>
43+
<|DSML|invoke name="get_favorite_tourist_spot">
44+
<|DSML|parameter name="city" string="true">San Francisco</|DSML|parameter>
45+
</|DSML|invoke>
46+
</|DSML|function_calls>
47+
48+
<|DSML|function_calls>
49+
<|DSML|invoke name="get_favorite_tourist_spot">
50+
{ "city": "San Francisco" }
51+
</|DSML|invoke>
52+
</|DSML|function_calls>
53+
```
54+
55+
Key Components:
56+
- Tool Calls Section: Wrapped between `<|DSML|function_calls>` and `</|DSML|function_calls>`
57+
- Individual Tool Call: Wrapped between `<|DSML|invoke name="...">` and `</|DSML|invoke>`
58+
- Parameters: Either XML tags or direct JSON format
59+
- Supports multiple tool calls
60+
61+
Reference: DeepSeek V3.2 format specification
62+
"""
63+
64+
def __init__(self):
65+
super().__init__()
66+
self.bot_token = "<|DSML|function_calls>" # nosec B105
67+
self.eot_token = "</|DSML|function_calls>" # nosec B105
68+
self.invoke_begin_regex = r'<|DSML|invoke\s+name="([^"]+)"\s*>'
69+
self.invoke_end_token = "</|DSML|invoke>" # nosec B105
70+
self.parameter_regex = (
71+
r'<|DSML|parameter\s+name="([^"]+)"\s+string="([^"]+)"\s*>(.*?)</|DSML|parameter>'
72+
)
73+
self._last_arguments = ""
74+
self.current_tool_id = -1
75+
76+
def has_tool_call(self, text: str) -> bool:
77+
"""Check if the text contains a deepseek v32 format tool call."""
78+
return self.bot_token in text
79+
80+
def _parse_parameters_from_xml(self, invoke_content: str) -> dict:
81+
"""Parse parameters from either XML-like format or JSON format to dict.
82+
83+
Supports two formats:
84+
1. XML parameter tags: <|DSML|parameter name="..." string="...">value</|DSML|parameter>
85+
2. Direct JSON: { "key": "value" }
86+
"""
87+
# First, try to parse as direct JSON (new format)
88+
invoke_content_stripped = invoke_content.strip()
89+
90+
if invoke_content_stripped.startswith("{") and invoke_content_stripped.endswith("}"):
91+
try:
92+
parameters = json.loads(invoke_content_stripped)
93+
if isinstance(parameters, dict):
94+
return parameters
95+
except (json.JSONDecodeError, ValueError):
96+
# If JSON parsing fails, fall through to XML parsing
97+
pass
98+
99+
# Fall back to XML parameter tag parsing (original format)
100+
parameters = {}
101+
param_matches = re.findall(self.parameter_regex, invoke_content, re.DOTALL)
102+
for param_name, param_type, param_value in param_matches:
103+
# Convert value based on type
104+
if param_type == "true": # string type
105+
parameters[param_name] = param_value.strip()
106+
else:
107+
# Try to parse as JSON for other types
108+
try:
109+
parameters[param_name] = json.loads(param_value.strip())
110+
except (json.JSONDecodeError, ValueError):
111+
parameters[param_name] = param_value.strip()
112+
return parameters
113+
114+
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
115+
"""One-time parsing: Detects and parses tool calls in the provided text.
116+
117+
:param text: The complete text to parse.
118+
:param tools: List of available tools.
119+
:return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
120+
"""
121+
idx = text.find(self.bot_token)
122+
normal_text = text[:idx].strip() if idx != -1 else text
123+
if self.bot_token not in text:
124+
return StreamingParseResult(normal_text=normal_text, calls=[])
125+
126+
calls = []
127+
try:
128+
# Extract content between function_calls tags
129+
function_calls_match = re.search(
130+
r"<|DSML|function_calls>(.*?)</|DSML|function_calls>",
131+
text,
132+
re.DOTALL,
133+
)
134+
if not function_calls_match:
135+
return StreamingParseResult(normal_text=normal_text, calls=[])
136+
137+
function_calls_content = function_calls_match.group(1)
138+
139+
# Find all invoke blocks
140+
invoke_pattern = r'<|DSML|invoke\s+name="([^"]+)"\s*>(.*?)</|DSML|invoke>'
141+
invoke_matches = re.findall(invoke_pattern, function_calls_content, re.DOTALL)
142+
143+
for func_name, invoke_content in invoke_matches:
144+
# Parse parameters from XML format
145+
func_args = self._parse_parameters_from_xml(invoke_content)
146+
# construct match_result for parse_base_json
147+
match_result = {"name": func_name, "parameters": func_args}
148+
calls.extend(self.parse_base_json(match_result, tools))
149+
150+
return StreamingParseResult(normal_text=normal_text, calls=calls)
151+
except Exception as e:
152+
logger.error(f"Error in detect_and_parse: {e}")
153+
# return the normal text if parsing fails
154+
return StreamingParseResult(normal_text=text)
155+
156+
def parse_streaming_increment(self, new_text: str, tools: List[Tool]) -> StreamingParseResult:
157+
"""Streaming incremental parsing tool calls for DeepSeekV32 format.
158+
159+
Supports multiple consecutive invoke blocks.
160+
"""
161+
self._buffer += new_text
162+
current_text = self._buffer
163+
164+
# Check if we have a tool call or any DSML-related content
165+
# Key insight: DSML tags contain distinctive markers like "|DSML|"
166+
# If we see these markers anywhere, we should keep buffering
167+
has_tool_call = self.bot_token in current_text or "<|DSML|invoke" in current_text
168+
169+
# Check if buffer contains any DSML markers or ends with potential tag prefix
170+
# This handles partial/streaming DSML content
171+
dsml_markers = ["|DSML|", "<|", "</|"]
172+
potentially_dsml = any(marker in current_text for marker in dsml_markers)
173+
174+
# Also check if text ends with start of a tag (to handle "<" arriving separately)
175+
dsml_prefixes = ["<", "<|", "</", "</|"]
176+
ends_with_prefix = any(current_text.rstrip().endswith(prefix) for prefix in dsml_prefixes)
177+
178+
if not has_tool_call and not potentially_dsml and not ends_with_prefix:
179+
self._buffer = ""
180+
for e_token in [self.eot_token, self.invoke_end_token]:
181+
if e_token in new_text:
182+
new_text = new_text.replace(e_token, "")
183+
return StreamingParseResult(normal_text=new_text)
184+
185+
if not hasattr(self, "_tool_indices"):
186+
self._tool_indices = self._get_tool_indices(tools)
187+
188+
all_calls: list[ToolCallItem] = []
189+
try:
190+
# Loop to handle multiple consecutive invoke blocks
191+
while True:
192+
# Try to match an invoke block (may be partial)
193+
invoke_match = re.search(
194+
pattern=r'<|DSML|invoke\s+name="([^"]+)"\s*>(.*?)(</|DSML|invoke>|$)',
195+
string=current_text,
196+
flags=re.DOTALL,
197+
)
198+
199+
if not invoke_match:
200+
break
201+
202+
func_name = invoke_match.group(1).strip()
203+
invoke_content = invoke_match.group(2)
204+
# group(3) is either "</|DSML|invoke>" (complete) or "" (incomplete, matched with $)
205+
is_tool_end = bool(invoke_match.group(3))
206+
207+
# Initialize state if this is the first tool call
208+
if self.current_tool_id == -1:
209+
self.current_tool_id = 0
210+
self.prev_tool_call_arr = []
211+
self.streamed_args_for_tool = [""]
212+
213+
# Don't pre-allocate arrays until we actually complete a tool call
214+
# This prevents _check_for_unstreamed_tool_args from sending incomplete calls
215+
216+
# Parse current parameters from XML/JSON
217+
current_params = self._parse_parameters_from_xml(invoke_content)
218+
current_args_json = json.dumps(current_params, ensure_ascii=False)
219+
220+
# Check if tool call is complete (has closing tag)
221+
if is_tool_end:
222+
# Only emit the tool call when it's complete (saw </|DSML|invoke>)
223+
# This ensures each function returns at most once
224+
calls_for_this_invoke: list[ToolCallItem] = []
225+
226+
# Note: invoke_content can be empty for functions with no parameters
227+
# This is valid and should NOT be skipped
228+
229+
# Send tool name
230+
calls_for_this_invoke.append(
231+
ToolCallItem(
232+
tool_index=self.current_tool_id,
233+
name=func_name,
234+
parameters="",
235+
)
236+
)
237+
238+
# Send parameters as complete JSON
239+
# Always send parameters, even if empty, to maintain consistency
240+
calls_for_this_invoke.append(
241+
ToolCallItem(
242+
tool_index=self.current_tool_id,
243+
name=None,
244+
parameters=current_args_json,
245+
)
246+
)
247+
248+
# Ensure arrays are large enough for current tool
249+
while len(self.prev_tool_call_arr) <= self.current_tool_id:
250+
self.prev_tool_call_arr.append({})
251+
while len(self.streamed_args_for_tool) <= self.current_tool_id:
252+
self.streamed_args_for_tool.append("")
253+
254+
# Update the stored arguments
255+
self.prev_tool_call_arr[self.current_tool_id] = {
256+
"name": func_name,
257+
"arguments": current_params,
258+
}
259+
self.streamed_args_for_tool[self.current_tool_id] = current_args_json
260+
261+
# Remove the completed tool call from buffer
262+
self._buffer = current_text[invoke_match.end() :]
263+
current_text = self._buffer # Update for next iteration
264+
265+
# Add calls for this invoke to all_calls
266+
all_calls.extend(calls_for_this_invoke)
267+
268+
# Move to next tool call
269+
self.current_tool_id += 1
270+
self._last_arguments = ""
271+
self.current_tool_name_sent = False
272+
273+
# Don't pre-allocate arrays for the next tool
274+
# Only allocate when we actually complete a tool call
275+
# This prevents _check_for_unstreamed_tool_args from sending incomplete calls
276+
277+
# Continue loop to check for more invoke blocks
278+
continue
279+
else:
280+
# Tool call not complete yet, don't return anything
281+
# Wait for more chunks until we see </|DSML|invoke>
282+
break
283+
284+
# No more invoke blocks found
285+
return StreamingParseResult(normal_text="", calls=all_calls)
286+
287+
except Exception as e:
288+
logger.error(f"Error in parse_streaming_increment: {e}")
289+
return StreamingParseResult(normal_text=current_text)
290+
291+
def structure_info(self) -> _GetInfoFunc:
292+
return lambda name: StructureInfo(
293+
begin=f'<|DSML|invoke name="{name}">',
294+
end="</|DSML|invoke>",
295+
trigger=f'<|DSML|invoke name="{name}">',
296+
)

tensorrt_llm/serve/tool_parser/tool_parser_factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .base_tool_parser import BaseToolParser
44
from .deepseekv3_parser import DeepSeekV3Parser
55
from .deepseekv31_parser import DeepSeekV31Parser
6+
from .deepseekv32_parser import DeepSeekV32Parser
67
from .kimi_k2_tool_parser import KimiK2ToolParser
78
from .qwen3_coder_parser import Qwen3CoderToolParser
89
from .qwen3_tool_parser import Qwen3ToolParser
@@ -15,6 +16,7 @@ class ToolParserFactory:
1516
"kimi_k2": KimiK2ToolParser,
1617
"deepseekv3": DeepSeekV3Parser,
1718
"deepseekv3.1": DeepSeekV31Parser,
19+
"deepseek_v32": DeepSeekV32Parser,
1820
}
1921

2022
@staticmethod

0 commit comments

Comments
 (0)