@@ -212,6 +212,9 @@ async def _stream_generator(self, stream, request_id: str) -> AsyncGenerator[str
212212 }
213213 finish_reason = "null"
214214
215+ full_text = ""
216+ full_reasoning = ""
217+
215218 async for chunk in stream :
216219 if chunk .usage :
217220 accumulated_usage ["total_tokens" ] = chunk .usage .total_tokens
@@ -220,11 +223,19 @@ async def _stream_generator(self, stream, request_id: str) -> AsyncGenerator[str
220223 details = getattr (chunk .usage , "completion_tokens_details" , None )
221224 if details :
222225 accumulated_usage ["output_tokens_details" ]["reasoning_tokens" ] = getattr (details , "reasoning_tokens" , 0 )
223- accumulated_usage ["output_tokens_details" ]["text_tokens" ] = accumulated_usage ["output_tokens" ] - accumulated_usage ["output_tokens_details" ]["reasoning_tokens" ]
226+ accumulated_usage ["output_tokens_details" ]["text_tokens" ] = (
227+ accumulated_usage ["output_tokens" ] - accumulated_usage ["output_tokens_details" ]["reasoning_tokens" ]
228+ )
224229
225230 delta = chunk .choices [0 ].delta if chunk .choices else None
226- content = delta .content if delta and delta .content else ""
227- reasoning = (getattr (delta , "reasoning_content" , "" ) or "" ) if delta else ""
231+ delta_content = delta .content if delta and delta .content else ""
232+ delta_reasoning = (getattr (delta , "reasoning_content" , "" ) or "" ) if delta else ""
233+
234+ # ✅ 累积完整内容
235+ if delta_content :
236+ full_text += delta_content
237+ if delta_reasoning :
238+ full_reasoning += delta_reasoning
228239
229240 tool_calls = None
230241 if delta and delta .tool_calls :
@@ -233,16 +244,31 @@ async def _stream_generator(self, stream, request_id: str) -> AsyncGenerator[str
233244 if chunk .choices and chunk .choices [0 ].finish_reason :
234245 finish_reason = chunk .choices [0 ].finish_reason
235246
236- message_body = {"role" : "assistant" , "content" : content , "reasoning_content" : reasoning }
237- if tool_calls : message_body ["tool_calls" ] = tool_calls
247+ # ✅ 关键:stop 包输出“完整累积内容”,避免最后一包是空导致聚合为空
248+ if finish_reason != "null" :
249+ content_to_send = full_text
250+ reasoning_to_send = full_reasoning
251+ else :
252+ content_to_send = delta_content
253+ reasoning_to_send = delta_reasoning
254+
255+ message_body = {
256+ "role" : "assistant" ,
257+ "content" : content_to_send ,
258+ "reasoning_content" : reasoning_to_send
259+ }
260+ if tool_calls :
261+ message_body ["tool_calls" ] = tool_calls
238262
239263 response_body = {
240264 "output" : {"choices" : [{"message" : message_body , "finish_reason" : finish_reason }]},
241265 "usage" : accumulated_usage ,
242266 "request_id" : request_id
243267 }
244268 yield f"data: { json .dumps (response_body , ensure_ascii = False )} \n \n "
245- if finish_reason != "null" : break
269+
270+ if finish_reason != "null" :
271+ break
246272
247273 def _format_unary_response (self , completion , request_id : str ):
248274 choice = completion .choices [0 ]
0 commit comments