1
+ """
1
2
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2
3
#
3
4
# Licensed under the Apache License, Version 2.0 (the "License"
11
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
13
# See the License for the specific language governing permissions and
13
14
# limitations under the License.
15
+ """
14
16
15
17
import json
16
18
import re
@@ -97,37 +99,37 @@ def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest)
97
99
remaining_text = model_output
98
100
99
101
while True :
100
- # 查找下一个tool_call块
102
+ # Find the next <tool_call>
101
103
tool_call_pos = remaining_text .find ("<tool_call>" )
102
104
if tool_call_pos == - 1 :
103
105
break
104
106
105
- # 提取tool_call开始位置后的内容
107
+ # Extract content after <tool_call>
106
108
tool_content_start = tool_call_pos + len ("<tool_call>" )
107
109
tool_content_end = remaining_text .find ("</tool_call>" , tool_content_start )
108
110
109
111
tool_json = ""
110
112
if tool_content_end == - 1 :
111
- # 处理未闭合的tool_call块(截断情况)
113
+ # Processing unclosed tool_call block (truncated case)
112
114
tool_json = remaining_text [tool_content_start :].strip ()
113
- remaining_text = "" # 没有更多内容需要处理
115
+ remaining_text = "" # No more content to process
114
116
else :
115
- # 处理完整的tool_call块
117
+ # Processing closed </tool_call> block
116
118
tool_json = remaining_text [tool_content_start :tool_content_end ].strip ()
117
119
remaining_text = remaining_text [tool_content_end + len ("</tool_call>" ) :]
118
120
119
121
if not tool_json :
120
122
continue
121
123
122
- # 处理JSON内容
124
+ # Process tool_json
123
125
tool_json = tool_json .strip ()
124
126
if not tool_json .startswith ("{" ):
125
127
tool_json = "{" + tool_json
126
128
if not tool_json .endswith ("}" ):
127
129
tool_json = tool_json + "}"
128
130
129
131
try :
130
- # 首先尝试标准JSON解析
132
+ # Parsing strategy: First try standard json.loads
131
133
try :
132
134
tool_data = json .loads (tool_json )
133
135
@@ -136,26 +138,26 @@ def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest)
136
138
{
137
139
"name" : tool_data ["name" ],
138
140
"arguments" : tool_data ["arguments" ],
139
- "_is_complete" : True , # 明确标记为完整解析
141
+ "_is_complete" : True , # Mark as complete
140
142
}
141
143
)
142
144
continue
143
145
except json .JSONDecodeError :
144
146
pass
145
147
146
- # 标准解析失败时尝试partial_json_parser
148
+ # Try partial_json_parser when standard parsing fails
147
149
from partial_json_parser .core .options import Allow
148
150
149
151
try :
150
152
tool_data = {}
151
153
flags = Allow .ALL & ~ Allow .STR
152
154
153
- # 解析name字段
155
+ # Parse the name field
154
156
name_match = re .search (r'"name"\s*:\s*"([^"]*)"' , tool_json )
155
157
if name_match :
156
158
tool_data ["name" ] = name_match .group (1 )
157
159
158
- # 解析arguments字段
160
+ # Parse the arguments field
159
161
args_match = re .search (r'"arguments"\s*:\s*(\{.*)' , tool_json )
160
162
if args_match :
161
163
try :
@@ -168,7 +170,7 @@ def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest)
168
170
{
169
171
"name" : tool_data .get ("name" , "" ),
170
172
"arguments" : tool_data .get ("arguments" , {}),
171
- "_is_partial" : True , # 标记为部分解析
173
+ "_is_partial" : True , # Mark as partial
172
174
}
173
175
)
174
176
except Exception as e :
@@ -183,18 +185,18 @@ def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest)
183
185
return ExtractedToolCallInformation (tools_called = False , content = model_output )
184
186
185
187
tool_calls = []
186
- all_complete = True # 初始设为True,只要有一个不完整就变为False
188
+ all_complete = True # Initialize as all complete
187
189
188
190
for tool_call in function_call_arr :
189
- # 记录工具调用解析状态
191
+ # Set flags
190
192
is_complete = tool_call .get ("_is_complete" , False )
191
193
is_partial = tool_call .get ("_is_partial" , False )
192
194
193
- # 只要有一个不完整就认为整体不完整
195
+ # If any tool call is incomplete or partial, mark all_complete as False
194
196
if not is_complete or is_partial :
195
197
all_complete = False
196
198
197
- # 处理参数序列化
199
+ # Process arguments
198
200
tool_args = tool_call .get ("arguments" , {})
199
201
if not isinstance (tool_args , dict ):
200
202
tool_args = {}
@@ -215,7 +217,7 @@ def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest)
215
217
)
216
218
)
217
219
218
- # 只有当所有工具调用都明确标记为complete时才返回tools_called =True
220
+ # Only return tools_called =True if all tool calls are complete
219
221
return ExtractedToolCallInformation (
220
222
tools_called = all_complete , tool_calls = tool_calls if tool_calls else None , content = ""
221
223
)
@@ -237,16 +239,16 @@ def extract_tool_calls_streaming(
237
239
238
240
if self .tool_call_start_token_id not in current_token_ids :
239
241
return DeltaMessage (content = delta_text )
240
- # 忽略空chunk
242
+ # Skip empty chunks
241
243
if len (delta_text .strip ()) == 0 :
242
244
return None
243
245
244
246
try :
245
247
delta = None
246
- # 使用buffer累积delta_text内容
248
+ # Use buffer to accumulate delta_text content
247
249
self .buffer += delta_text
248
250
249
- # 处理增量中的新tool_call开始
251
+ # Process the buffer content
250
252
if "<tool_call>" in delta_text :
251
253
self .current_tool_id = (
252
254
max (self .current_tool_id , 0 ) if self .current_tool_id == - 1 else self .current_tool_id + 1
@@ -256,7 +258,7 @@ def extract_tool_calls_streaming(
256
258
self .streamed_args_for_tool .append ("" )
257
259
data_processor_logger .debug (f"New tool call started with ID: { self .current_tool_id } " )
258
260
259
- # 1. 尝试解析name字段
261
+ # 1. Try to parse the name field
260
262
if not self .current_tool_name_sent and '"name"' in self .buffer :
261
263
name_match = re .search (r'"name"\s*:\s*"([^"]*)"' , self .buffer )
262
264
if name_match :
@@ -272,32 +274,31 @@ def extract_tool_calls_streaming(
272
274
)
273
275
]
274
276
)
275
- # 删除已处理的name部分
277
+ # Delete the processed name part from the buffer
276
278
self .buffer = self .buffer [name_match .end () :]
277
279
self .current_tool_name_sent = True
278
280
return delta
279
- # 2. 尝试解析arguments字段
281
+ # 2. Processing arguments field
280
282
if '"arguments"' in self .buffer :
281
283
args_match = re .search (r'"arguments"\s*:\s*(\{.*)' , self .buffer )
282
284
if args_match :
283
285
args_content = args_match .group (1 )
284
286
try :
285
- # 检查是否到达arguments结尾(括号完全匹配)
287
+ # Check if arguments field is complete by bracket matching
286
288
if "}}" in args_content :
287
- # 逐个字符检查括号匹配状态
288
289
matched_pos = - 1
289
290
for i , ch in enumerate (delta_text ):
290
291
if ch == "{" :
291
292
self .bracket_counts ["total_l" ] += 1
292
293
elif ch == "}" :
293
294
self .bracket_counts ["total_r" ] += 1
294
295
295
- if self .bracket_counts ["total_l" ] == self .bracket_counts ["total_r" ]: # 括号完全匹配
296
+ if self .bracket_counts ["total_l" ] == self .bracket_counts ["total_r" ]:
296
297
matched_pos = i
297
298
break
298
299
299
300
if matched_pos >= 0 :
300
- # 找到匹配点,清理buffer并返回
301
+ # Clean up bracket counts for next tool call
301
302
truncate_text = delta_text [: matched_pos + 1 ]
302
303
delta = DeltaMessage (
303
304
tool_calls = [
@@ -312,10 +313,10 @@ def extract_tool_calls_streaming(
312
313
self .buffer = self .buffer [args_match .end () :]
313
314
return delta
314
315
else :
315
- # 没有完全匹配,继续累积
316
+ # No complete match yet
316
317
return None
317
318
else :
318
- # 增量返回当前可解析的部分
319
+ # Return partial arguments
319
320
for ch in delta_text :
320
321
if ch == "{" :
321
322
self .bracket_counts ["total_l" ] += 1
@@ -337,7 +338,6 @@ def extract_tool_calls_streaming(
337
338
end_pos = self .buffer .find ("</tool_call>" )
338
339
self .buffer = self .buffer [end_pos + len ("</tool_call>" ) :]
339
340
340
- # 完成当前工具调用处理
341
341
self .streamed_args_for_tool .append ("" )
342
342
343
343
return delta
0 commit comments