Skip to content

Commit bd192b2

Browse files
committed
fix parser
1 parent bfdec9f commit bd192b2

File tree

4 files changed

+134
-96
lines changed

4 files changed

+134
-96
lines changed

fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(self, tokenizer):
5858
self.tool_call_start_token_id = self.vocab.get(self.tool_call_start_token)
5959
self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
6060
if self.tool_call_start_token_id is None or self.tool_call_end_token_id is None:
61-
raise RuntimeError("Ernie x1 Tool parser could not locate tool call start/end " "tokens in the tokenizer!")
61+
raise RuntimeError("Ernie x1 Tool parser could not locate tool call start/end tokens in the tokenizer!")
6262

6363
if not self.model_tokenizer:
6464
raise ValueError(

fastdeploy/reasoning/ernie_vl_reasoning_parsers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def __init__(self, tokenizer):
5353

5454
if missing_tokens:
5555
raise RuntimeError(
56-
f"Could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
56+
f"ernie vl reasoning parser could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
5757
)
5858
self.token_status_mapping = {
5959
self.think_start_token_id: "think_start",

fastdeploy/reasoning/ernie_x1_reasoning_parsers.py

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,11 @@ def __init__(self, tokenizer):
5454
token_id = self.vocab.get(token_value)
5555
setattr(self, f"{name}_id", token_id)
5656
if token_id is None:
57-
missing_tokens.append(f"{name.replace('_', ' ')} token")
57+
missing_tokens.append(token_value)
5858

5959
if missing_tokens:
6060
raise RuntimeError(
61-
f"Could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
61+
f"ernie x1 reasoning parser could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
6262
)
6363

6464
self.token_status_mapping = {
@@ -106,22 +106,33 @@ def extract_reasoning_content_streaming(
106106
return None
107107

108108
if model_status == "think_start":
109-
if self.think_end_token_id not in current_token_ids:
110-
return DeltaMessage(reasoning_content=delta_text)
111-
else:
109+
if self.think_end_token_id in delta_token_ids:
110+
reasoning_content = ""
111+
response_content = ""
112+
end_index = delta_text.find(self.think_end_token)
113+
reasoning_content = delta_text[:end_index]
114+
response_start_pos = delta_text.find(self.response_start_token)
115+
if response_start_pos != -1:
116+
response_content = self._extract_response_content(
117+
delta_text[response_start_pos + len(self.response_start_token) :]
118+
)
119+
return DeltaMessage(reasoning_content=reasoning_content, content=response_content)
120+
elif self.think_end_token_id in previous_token_ids:
112121
if (
113-
self.response_start_token_id in current_token_ids
114-
and self.response_end_token_id not in current_token_ids
122+
self.response_start_token_id in previous_token_ids
123+
and self.response_end_token_id not in previous_token_ids
115124
):
116125
return DeltaMessage(content=delta_text)
126+
else:
127+
return DeltaMessage(reasoning_content=delta_text)
117128
elif model_status == "think_end":
118129
if (
119-
self.response_start_token_id in current_token_ids
130+
self.response_start_token_id in previous_token_ids
120131
and self.response_end_token_id not in current_token_ids
121132
):
122133
return DeltaMessage(content=delta_text)
123134
elif model_status == "response_start":
124-
if self.response_end_token_id not in current_token_ids:
135+
if self.response_end_token_id not in previous_token_ids:
125136
return DeltaMessage(content=delta_text)
126137

127138
return None
@@ -130,33 +141,29 @@ def extract_reasoning_content(
130141
self, model_output: str, request: ChatCompletionRequest, model_status: str
131142
) -> Tuple[str, str]:
132143
"""
133-
Optimized batch version of the enhanced parser.
134-
Preserves newlines in both reasoning and response content,
135-
only removing the single newline before closing tags.
144+
优化版解析器。保留推理和响应内容中的换行符,
145+
仅删除闭合标签前的单个换行符。
136146
"""
137147
reasoning_content = ""
138148
response_content = ""
139149

140-
if model_status == "think_start":
141-
think_end_pos = model_output.find(self.think_end_token)
142-
if think_end_pos != -1:
143-
reasoning_content = model_output[:think_end_pos]
144-
remaining = model_output[think_end_pos + len(self.think_end_token) :].lstrip("\n")
145-
146-
# Determine if remaining content is a response or tool call
147-
if remaining.startswith(self.response_start_token):
148-
response_start_len = len(self.response_start_token)
149-
response_content = self._extract_response_content(remaining[response_start_len:])
150-
elif remaining.startswith(self.tool_call_start_token):
151-
pass # No response content
150+
if model_status in ["think_start", "think_end"]:
151+
if model_status == "think_start":
152+
think_end_pos = model_output.find(self.think_end_token)
153+
if think_end_pos != -1:
154+
reasoning_content = model_output[:think_end_pos]
155+
remaining = model_output[think_end_pos + len(self.think_end_token) :].lstrip("\n")
156+
else:
157+
reasoning_content = model_output
158+
remaining = ""
152159
else:
153-
reasoning_content = model_output
160+
remaining = model_output.lstrip("\n")
154161

155-
elif model_status == "think_end":
156-
remaining = model_output.lstrip("\n")
157-
if remaining.startswith(self.response_start_token):
158-
response_start_len = len(self.response_start_token)
159-
response_content = self._extract_response_content(remaining[response_start_len:])
162+
response_start_pos = remaining.find(self.response_start_token)
163+
if response_start_pos != -1:
164+
response_content = self._extract_response_content(
165+
remaining[response_start_pos + len(self.response_start_token) :]
166+
)
160167

161168
elif model_status == "response_start":
162169
response_content = self._extract_response_content(model_output)

fastdeploy/reasoning/qwen3_reasoning_parsers.py

Lines changed: 95 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,49 @@ class Qwen3ReasoningParser(ReasoningParser):
3535

3636
def __init__(self, tokenizer):
3737
super().__init__(tokenizer)
38-
self.think_start_token = "<think>"
39-
self.think_end_token = "</think>"
38+
39+
# 定义所有需要检查的token
40+
token_definitions = {
41+
"think_start_token": "<think>",
42+
"think_end_token": "</think>",
43+
}
4044

4145
if not self.model_tokenizer:
42-
raise ValueError(
43-
"The model tokenizer must be passed to the ReasoningParser " "constructor during construction."
46+
raise ValueError("The model tokenizer must be passed to the ReasoningParser constructor.")
47+
48+
missing_tokens = []
49+
for name, token_value in token_definitions.items():
50+
setattr(self, name, token_value)
51+
token_id = self.vocab.get(token_value)
52+
setattr(self, f"{name}_id", token_id)
53+
if token_id is None:
54+
missing_tokens.append(token_value)
55+
56+
if missing_tokens:
57+
raise RuntimeError(
58+
f"Qwen3 reasoning parser could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
4459
)
45-
46-
self.think_start_token_id = self.vocab.get(self.think_start_token)
47-
self.think_end_token_id = self.vocab.get(self.think_end_token)
48-
if self.think_end_token_id is None:
49-
raise RuntimeError("Qwen3 reasoning parser could not locate think end " "tokens in the tokenizer!")
60+
self.token_status_mapping = {
61+
self.think_start_token_id: "think_start",
62+
self.think_end_token_id: "think_end",
63+
}
5064

5165
def is_reasoning_end(self, input_ids: list[int]) -> bool:
5266
return self.think_end_token_id in input_ids
5367

68+
def find_last_special_token(self, prompt_token_ids: list[int]) -> int:
69+
for i in range(len(prompt_token_ids) - 1, -1, -1):
70+
if prompt_token_ids[i] in self.token_status_mapping:
71+
return prompt_token_ids[i]
72+
return -1
73+
5474
def get_model_status(self, prompt_token_ids: list[int]):
55-
return "think_start"
75+
special_token_id = self.find_last_special_token(prompt_token_ids)
76+
77+
if special_token_id == -1:
78+
return "think_start"
79+
80+
return self.token_status_mapping[special_token_id]
5681

5782
def extract_reasoning_content_streaming(
5883
self,
@@ -75,36 +100,39 @@ def extract_reasoning_content_streaming(
75100
if len(delta_token_ids) == 1 and (delta_token_ids[0] in [self.think_start_token_id, self.think_end_token_id]):
76101
return None
77102

78-
# </think> in delta
79-
if self.think_end_token_id in delta_token_ids:
80-
# <think> in delta, </think> in delta, extract reasoning content
81-
if self.think_start_token_id in delta_token_ids:
103+
if model_status == "think_start":
104+
# </think> in delta
105+
if self.think_end_token_id in delta_token_ids:
106+
# <think> in delta, </think> in delta, extract reasoning content
107+
if self.think_start_token_id in delta_token_ids:
108+
start_index = delta_text.find(self.think_start_token)
109+
end_index = delta_token_ids.find(self.think_end_token)
110+
reasoning_content = delta_text[start_index + len(self.think_start_token) : end_index]
111+
content = delta_text[end_index + len(self.think_end_token) :]
112+
return DeltaMessage(reasoning_content=reasoning_content, content=content)
113+
# <think> in previous, </think> in delta,
114+
else:
115+
end_index = delta_text.find(self.think_end_token)
116+
reasoning_content = delta_text[:end_index]
117+
content = delta_text[end_index + len(self.think_end_token) :]
118+
content = content if content else None
119+
return DeltaMessage(reasoning_content=reasoning_content, content=content)
120+
# </think> in previous reasoning content continues
121+
elif self.think_end_token_id in previous_token_ids:
122+
return DeltaMessage(content=delta_text)
123+
# <think> in previous
124+
elif self.think_start_token_id in previous_token_ids:
125+
return DeltaMessage(reasoning_content=delta_text)
126+
# <think> in delta
127+
elif self.think_start_token_id in delta_token_ids:
82128
start_index = delta_text.find(self.think_start_token)
83-
end_index = delta_token_ids.find(self.think_end_token)
84-
reasoning_content = delta_text[start_index + len(self.think_start_token) : end_index]
85-
content = delta_text[end_index + len(self.think_end_token) :]
129+
reasoning_content = delta_text[start_index + len(self.think_start_token) :]
130+
content = ""
86131
return DeltaMessage(reasoning_content=reasoning_content, content=content)
87-
# <think> in previous, </think> in delta,
88132
else:
89-
end_index = delta_text.find(self.think_end_token)
90-
reasoning_content = delta_text[:end_index]
91-
content = delta_text[end_index + len(self.think_end_token) :]
92-
content = content if content else None
93-
return DeltaMessage(reasoning_content=reasoning_content, content=content)
94-
# </think> in previous reasoning content continues
95-
elif self.think_end_token_id in previous_token_ids:
96-
return DeltaMessage(content=delta_text)
97-
# <think> in previous
98-
elif self.think_start_token_id in previous_token_ids:
99-
return DeltaMessage(reasoning_content=delta_text)
100-
# <think> in delta
101-
elif self.think_start_token_id in delta_token_ids:
102-
start_index = delta_text.find(self.think_start_token)
103-
reasoning_content = delta_text[start_index + len(self.think_start_token) :]
104-
content = ""
105-
return DeltaMessage(reasoning_content=reasoning_content, content=content)
133+
return DeltaMessage(reasoning_content=delta_text)
106134
else:
107-
return DeltaMessage(reasoning_content=delta_text)
135+
return DeltaMessage(content=delta_text)
108136

109137
def extract_reasoning_content(
110138
self, model_output: str, request: ChatCompletionRequest, model_status: str
@@ -120,36 +148,39 @@ def extract_reasoning_content(
120148
tuple[Optional[str], Optional[str]]: reasoning content and content
121149
"""
122150

123-
# 检查是否包含结束标签
124-
if self.think_end_token not in model_output:
125-
return None, model_output
126-
127-
# 检查是否有起始标签
128-
if self.think_start_token in model_output:
129-
# 标准格式:<think>content</think>answer
130-
if self.think_start_token not in model_output or self.think_end_token not in model_output:
131-
return None, model_output
132-
# Check if the <think> is present in the model output, remove it
133-
# if it is present.
134-
model_output_parts = model_output.partition(self.think_start_token)
135-
model_output = model_output_parts[2] if model_output_parts[1] else model_output_parts[0]
136-
# Check if the model output contains the </think> tokens.
137-
# If the end token is not found, return the model output as is.
151+
if model_status == "think_start":
152+
# 检查是否包含结束标签
138153
if self.think_end_token not in model_output:
139154
return None, model_output
140155

141-
# Extract reasoning content from the model output.
142-
reasoning_content, _, content = model_output.partition(self.think_end_token)
143-
144-
final_content = content or None
145-
return reasoning_content, final_content
146-
else:
147-
# 缺少起始标签的格式:content</think>answer
148-
parts = model_output.split(self.think_end_token, 1)
149-
150-
if len(parts) == 2:
151-
reasoning_content = parts[0].strip()
152-
final_content = parts[1].strip() if parts[1].strip() else None
156+
# 检查是否有起始标签
157+
if self.think_start_token in model_output:
158+
# 标准格式:<think>content</think>answer
159+
if self.think_start_token not in model_output or self.think_end_token not in model_output:
160+
return None, model_output
161+
# Check if the <think> is present in the model output, remove it
162+
# if it is present.
163+
model_output_parts = model_output.partition(self.think_start_token)
164+
model_output = model_output_parts[2] if model_output_parts[1] else model_output_parts[0]
165+
# Check if the model output contains the </think> tokens.
166+
# If the end token is not found, return the model output as is.
167+
if self.think_end_token not in model_output:
168+
return None, model_output
169+
170+
# Extract reasoning content from the model output.
171+
reasoning_content, _, content = model_output.partition(self.think_end_token)
172+
173+
final_content = content or None
153174
return reasoning_content, final_content
175+
else:
176+
# 缺少起始标签的格式:content</think>answer
177+
parts = model_output.split(self.think_end_token, 1)
154178

155-
return None, model_output
179+
if len(parts) == 2:
180+
reasoning_content = parts[0].strip()
181+
final_content = parts[1].strip() if parts[1].strip() else None
182+
return reasoning_content, final_content
183+
184+
return None, model_output
185+
else:
186+
return None, model_output

0 commit comments

Comments
 (0)