@@ -35,24 +35,49 @@ class Qwen3ReasoningParser(ReasoningParser):
35
35
36
36
def __init__ (self , tokenizer ):
37
37
super ().__init__ (tokenizer )
38
- self .think_start_token = "<think>"
39
- self .think_end_token = "</think>"
38
+
39
+ # 定义所有需要检查的token
40
+ token_definitions = {
41
+ "think_start_token" : "<think>" ,
42
+ "think_end_token" : "</think>" ,
43
+ }
40
44
41
45
if not self .model_tokenizer :
42
- raise ValueError (
43
- "The model tokenizer must be passed to the ReasoningParser " "constructor during construction."
46
+ raise ValueError ("The model tokenizer must be passed to the ReasoningParser constructor." )
47
+
48
+ missing_tokens = []
49
+ for name , token_value in token_definitions .items ():
50
+ setattr (self , name , token_value )
51
+ token_id = self .vocab .get (token_value )
52
+ setattr (self , f"{ name } _id" , token_id )
53
+ if token_id is None :
54
+ missing_tokens .append (token_value )
55
+
56
+ if missing_tokens :
57
+ raise RuntimeError (
58
+ f"Qwen3 reasoning parser could not find the following token ids in tokenizer vocabulary: { ', ' .join (missing_tokens )} "
44
59
)
45
-
46
- self .think_start_token_id = self .vocab .get (self .think_start_token )
47
- self .think_end_token_id = self .vocab .get (self .think_end_token )
48
- if self .think_end_token_id is None :
49
- raise RuntimeError ("Qwen3 reasoning parser could not locate think end " "tokens in the tokenizer!" )
60
+ self .token_status_mapping = {
61
+ self .think_start_token_id : "think_start" ,
62
+ self .think_end_token_id : "think_end" ,
63
+ }
50
64
51
65
def is_reasoning_end (self , input_ids : list [int ]) -> bool :
52
66
return self .think_end_token_id in input_ids
53
67
68
+ def find_last_special_token (self , prompt_token_ids : list [int ]) -> int :
69
+ for i in range (len (prompt_token_ids ) - 1 , - 1 , - 1 ):
70
+ if prompt_token_ids [i ] in self .token_status_mapping :
71
+ return prompt_token_ids [i ]
72
+ return - 1
73
+
54
74
def get_model_status (self , prompt_token_ids : list [int ]):
55
- return "think_start"
75
+ special_token_id = self .find_last_special_token (prompt_token_ids )
76
+
77
+ if special_token_id == - 1 :
78
+ return "think_start"
79
+
80
+ return self .token_status_mapping [special_token_id ]
56
81
57
82
def extract_reasoning_content_streaming (
58
83
self ,
@@ -75,36 +100,39 @@ def extract_reasoning_content_streaming(
75
100
if len (delta_token_ids ) == 1 and (delta_token_ids [0 ] in [self .think_start_token_id , self .think_end_token_id ]):
76
101
return None
77
102
78
- # </think> in delta
79
- if self .think_end_token_id in delta_token_ids :
80
- # <think> in delta, </think> in delta, extract reasoning content
81
- if self .think_start_token_id in delta_token_ids :
103
+ if model_status == "think_start" :
104
+ # </think> in delta
105
+ if self .think_end_token_id in delta_token_ids :
106
+ # <think> in delta, </think> in delta, extract reasoning content
107
+ if self .think_start_token_id in delta_token_ids :
108
+ start_index = delta_text .find (self .think_start_token )
109
+ end_index = delta_token_ids .find (self .think_end_token )
110
+ reasoning_content = delta_text [start_index + len (self .think_start_token ) : end_index ]
111
+ content = delta_text [end_index + len (self .think_end_token ) :]
112
+ return DeltaMessage (reasoning_content = reasoning_content , content = content )
113
+ # <think> in previous, </think> in delta,
114
+ else :
115
+ end_index = delta_text .find (self .think_end_token )
116
+ reasoning_content = delta_text [:end_index ]
117
+ content = delta_text [end_index + len (self .think_end_token ) :]
118
+ content = content if content else None
119
+ return DeltaMessage (reasoning_content = reasoning_content , content = content )
120
+ # </think> in previous reasoning content continues
121
+ elif self .think_end_token_id in previous_token_ids :
122
+ return DeltaMessage (content = delta_text )
123
+ # <think> in previous
124
+ elif self .think_start_token_id in previous_token_ids :
125
+ return DeltaMessage (reasoning_content = delta_text )
126
+ # <think> in delta
127
+ elif self .think_start_token_id in delta_token_ids :
82
128
start_index = delta_text .find (self .think_start_token )
83
- end_index = delta_token_ids .find (self .think_end_token )
84
- reasoning_content = delta_text [start_index + len (self .think_start_token ) : end_index ]
85
- content = delta_text [end_index + len (self .think_end_token ) :]
129
+ reasoning_content = delta_text [start_index + len (self .think_start_token ) :]
130
+ content = ""
86
131
return DeltaMessage (reasoning_content = reasoning_content , content = content )
87
- # <think> in previous, </think> in delta,
88
132
else :
89
- end_index = delta_text .find (self .think_end_token )
90
- reasoning_content = delta_text [:end_index ]
91
- content = delta_text [end_index + len (self .think_end_token ) :]
92
- content = content if content else None
93
- return DeltaMessage (reasoning_content = reasoning_content , content = content )
94
- # </think> in previous reasoning content continues
95
- elif self .think_end_token_id in previous_token_ids :
96
- return DeltaMessage (content = delta_text )
97
- # <think> in previous
98
- elif self .think_start_token_id in previous_token_ids :
99
- return DeltaMessage (reasoning_content = delta_text )
100
- # <think> in delta
101
- elif self .think_start_token_id in delta_token_ids :
102
- start_index = delta_text .find (self .think_start_token )
103
- reasoning_content = delta_text [start_index + len (self .think_start_token ) :]
104
- content = ""
105
- return DeltaMessage (reasoning_content = reasoning_content , content = content )
133
+ return DeltaMessage (reasoning_content = delta_text )
106
134
else :
107
- return DeltaMessage (reasoning_content = delta_text )
135
+ return DeltaMessage (content = delta_text )
108
136
109
137
def extract_reasoning_content (
110
138
self , model_output : str , request : ChatCompletionRequest , model_status : str
@@ -120,36 +148,39 @@ def extract_reasoning_content(
120
148
tuple[Optional[str], Optional[str]]: reasoning content and content
121
149
"""
122
150
123
- # 检查是否包含结束标签
124
- if self .think_end_token not in model_output :
125
- return None , model_output
126
-
127
- # 检查是否有起始标签
128
- if self .think_start_token in model_output :
129
- # 标准格式:<think>content</think>answer
130
- if self .think_start_token not in model_output or self .think_end_token not in model_output :
131
- return None , model_output
132
- # Check if the <think> is present in the model output, remove it
133
- # if it is present.
134
- model_output_parts = model_output .partition (self .think_start_token )
135
- model_output = model_output_parts [2 ] if model_output_parts [1 ] else model_output_parts [0 ]
136
- # Check if the model output contains the </think> tokens.
137
- # If the end token is not found, return the model output as is.
151
+ if model_status == "think_start" :
152
+ # 检查是否包含结束标签
138
153
if self .think_end_token not in model_output :
139
154
return None , model_output
140
155
141
- # Extract reasoning content from the model output.
142
- reasoning_content , _ , content = model_output .partition (self .think_end_token )
143
-
144
- final_content = content or None
145
- return reasoning_content , final_content
146
- else :
147
- # 缺少起始标签的格式:content</think>answer
148
- parts = model_output .split (self .think_end_token , 1 )
149
-
150
- if len (parts ) == 2 :
151
- reasoning_content = parts [0 ].strip ()
152
- final_content = parts [1 ].strip () if parts [1 ].strip () else None
156
+ # 检查是否有起始标签
157
+ if self .think_start_token in model_output :
158
+ # 标准格式:<think>content</think>answer
159
+ if self .think_start_token not in model_output or self .think_end_token not in model_output :
160
+ return None , model_output
161
+ # Check if the <think> is present in the model output, remove it
162
+ # if it is present.
163
+ model_output_parts = model_output .partition (self .think_start_token )
164
+ model_output = model_output_parts [2 ] if model_output_parts [1 ] else model_output_parts [0 ]
165
+ # Check if the model output contains the </think> tokens.
166
+ # If the end token is not found, return the model output as is.
167
+ if self .think_end_token not in model_output :
168
+ return None , model_output
169
+
170
+ # Extract reasoning content from the model output.
171
+ reasoning_content , _ , content = model_output .partition (self .think_end_token )
172
+
173
+ final_content = content or None
153
174
return reasoning_content , final_content
175
+ else :
176
+ # 缺少起始标签的格式:content</think>answer
177
+ parts = model_output .split (self .think_end_token , 1 )
154
178
155
- return None , model_output
179
+ if len (parts ) == 2 :
180
+ reasoning_content = parts [0 ].strip ()
181
+ final_content = parts [1 ].strip () if parts [1 ].strip () else None
182
+ return reasoning_content , final_content
183
+
184
+ return None , model_output
185
+ else :
186
+ return None , model_output
0 commit comments