@@ -591,17 +591,14 @@ async def _stream_generator(
591591
592592 # --- Stop Logic Buffer State ---
593593 content_buffer = ""
594- # Determine the maximum length of any stop sequence to know how much safe buffer to keep
595594 max_stop_len = max ([len (s ) for s in stop_sequences ]) if stop_sequences else 0
596595 stop_triggered = False
597596
598597 async for chunk in stream :
599598 if stop_triggered :
600- continue # Consume stream but do not yield
599+ continue
601600
602601 if chunk .usage :
603- # Note: Usage might be slightly higher than what user sees if we cut it off,
604- # but we pass upstream usage for accuracy of billing/computation.
605602 accumulated_usage ["total_tokens" ] = chunk .usage .total_tokens
606603 accumulated_usage ["input_tokens" ] = chunk .usage .prompt_tokens
607604 accumulated_usage ["output_tokens" ] = chunk .usage .completion_tokens
@@ -617,13 +614,13 @@ async def _stream_generator(
617614
618615 delta = chunk .choices [0 ].delta if chunk .choices else None
619616
620- # Reasoning Content
617+ # --- 1. Reasoning Content Handling ---
621618 delta_reasoning = (
622619 (getattr (delta , "reasoning_content" , "" ) or "" ) if delta else ""
623620 )
624621 if delta_reasoning :
625622 full_reasoning += delta_reasoning
626- # Reasoning is yielded immediately regardless of buffering state of content
623+ # 修复逻辑:无论是否 incremental,收到推理内容时都应输出
627624 if is_incremental :
628625 yield self ._build_stream_response (
629626 content = "" ,
@@ -633,49 +630,47 @@ async def _stream_generator(
633630 usage = accumulated_usage ,
634631 request_id = request_id ,
635632 )
633+ else :
634+ # 非增量模式下,输出当前累积的全量推理内容 + 当前累积的全量文本
635+ yield self ._build_stream_response (
636+ content = full_text ,
637+ reasoning_content = full_reasoning ,
638+ tool_calls = None ,
639+ finish_reason = "null" ,
640+ usage = accumulated_usage ,
641+ request_id = request_id ,
642+ )
636643
637- # Content
644+ # --- 2. Content Handling ---
638645 delta_content = delta .content if delta and delta .content else ""
639-
640646 content_to_yield = ""
641647
642648 if delta_content :
643649 if not stop_sequences :
644- # No stop logic, pass through
645650 content_to_yield = delta_content
646651 full_text += delta_content
647652 else :
648- # Buffer logic
649653 content_buffer += delta_content
650-
651- # Check if buffer contains any stop sequence
652654 earliest_idx , _ = self ._find_earliest_stop (
653655 content_buffer , stop_sequences
654656 )
655657
656658 if earliest_idx != - 1 :
657- # Stop found
658659 stop_triggered = True
659660 finish_reason = "stop"
660-
661- # Yield everything up to the stop sequence
662661 final_chunk = content_buffer [:earliest_idx ]
663662 content_to_yield = final_chunk
664663 full_text += final_chunk
665- content_buffer = "" # Clear buffer
666- # We break the loop later after sending this final chunk
664+ content_buffer = ""
667665 else :
668- # No stop found yet.
669- # We can safely yield the part of the buffer that is "safe"
670- # Simplest heuristic: Keep the last N characters where N is max_stop_len
671666 if len (content_buffer ) > max_stop_len :
672667 safe_chars = len (content_buffer ) - max_stop_len
673668 chunk_safe = content_buffer [:safe_chars ]
674669 content_to_yield = chunk_safe
675670 full_text += chunk_safe
676671 content_buffer = content_buffer [safe_chars :]
677672
678- # Tool calls logic
673+ # --- 3. Tool Calls Handling ---
679674 current_tool_calls_payload = None
680675 if delta and delta .tool_calls :
681676 if is_incremental :
@@ -715,32 +710,48 @@ async def _stream_generator(
715710 if upstream_finish != "null" :
716711 finish_reason = upstream_finish
717712
718- # Yield Content if we have something ready to go
719- if is_incremental :
720- if (
721- content_to_yield
722- or current_tool_calls_payload
723- or (stop_triggered and finish_reason == "stop" )
724- ):
713+ # --- 4. Yield Content Logic (Fixed) ---
714+
715+ # 判断是否有实质内容需要推送
716+ has_content_update = (
717+ content_to_yield
718+ or current_tool_calls_payload
719+ or (stop_triggered and finish_reason == "stop" )
720+ )
721+
722+ if has_content_update :
723+ if is_incremental :
724+ # 增量模式:只推 delta
725725 yield self ._build_stream_response (
726726 content = content_to_yield ,
727- reasoning_content = "" , # Already sent above
727+ reasoning_content = "" , # 推理已在上方单独处理
728728 tool_calls = current_tool_calls_payload ,
729- finish_reason = (
730- finish_reason if stop_triggered else "null"
731- ), # Don't send upstream finish yet unless stopped
729+ finish_reason = (finish_reason if stop_triggered else "null" ),
730+ usage = accumulated_usage ,
731+ request_id = request_id ,
732+ )
733+ else :
734+ # 非增量模式(全量模式):推送 full_text
735+ # 注意:非增量模式下,Tool Calls 通常建议在结束时统一发送,或者累积发送
736+ # 这里为了保持简洁,暂不实时推送未完成的 Tool Calls 结构,除非你需要
737+ yield self ._build_stream_response (
738+ content = full_text ,
739+ reasoning_content = full_reasoning ,
740+ tool_calls = None , # 全量模式通常不流式传输部分工具调用,只传输文本
741+ finish_reason = (finish_reason if stop_triggered else "null" ),
732742 usage = accumulated_usage ,
733743 request_id = request_id ,
734744 )
735745
736746 if stop_triggered :
737747 break
738748
739- # End of Stream
749+ # --- End of Stream Handling ---
740750
741- # If we have leftover buffer and weren't stopped, flush it now
751+ # Flush leftover buffer if not stopped
742752 if not stop_triggered and content_buffer and stop_sequences :
743753 full_text += content_buffer
754+ # 如果还有缓冲区剩余,根据模式推送
744755 if is_incremental :
745756 yield self ._build_stream_response (
746757 content = content_buffer ,
@@ -750,10 +761,19 @@ async def _stream_generator(
750761 usage = accumulated_usage ,
751762 request_id = request_id ,
752763 )
764+ else :
765+ yield self ._build_stream_response (
766+ content = full_text ,
767+ reasoning_content = full_reasoning ,
768+ tool_calls = None ,
769+ finish_reason = "null" ,
770+ usage = accumulated_usage ,
771+ request_id = request_id ,
772+ )
753773
774+ # Final Finish Handling
754775 if not is_incremental :
755- # For non-incremental (but stream=True upstream), we yield the whole thing at the end
756- # Applying stop logic to the full text collected
776+ # 非增量模式的最后一包,包含 finish_reason 和完整的 Tool Calls
757777 if stop_sequences :
758778 earliest_idx , _ = self ._find_earliest_stop (full_text , stop_sequences )
759779 if earliest_idx != - 1 :
@@ -775,7 +795,7 @@ async def _stream_generator(
775795 request_id = request_id ,
776796 )
777797 else :
778- # Send final finish reason for incremental mode if not already sent via stop trigger
798+ # 增量模式,如果没有通过 stop 触发结束,需要发一个空的结束包
779799 if not stop_triggered and finish_reason != "null" :
780800 yield self ._build_stream_response (
781801 content = "" ,
0 commit comments