Skip to content

Commit b21475f

Browse files
committed
Update mock_server.py
1 parent 884fcff commit b21475f

File tree

1 file changed

+57
-37
lines changed

1 file changed

+57
-37
lines changed

tests/mock_server.py

Lines changed: 57 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -591,17 +591,14 @@ async def _stream_generator(
591591

592592
# --- Stop Logic Buffer State ---
593593
content_buffer = ""
594-
# Determine the maximum length of any stop sequence to know how much safe buffer to keep
595594
max_stop_len = max([len(s) for s in stop_sequences]) if stop_sequences else 0
596595
stop_triggered = False
597596

598597
async for chunk in stream:
599598
if stop_triggered:
600-
continue # Consume stream but do not yield
599+
continue
601600

602601
if chunk.usage:
603-
# Note: Usage might be slightly higher than what user sees if we cut it off,
604-
# but we pass upstream usage for accuracy of billing/computation.
605602
accumulated_usage["total_tokens"] = chunk.usage.total_tokens
606603
accumulated_usage["input_tokens"] = chunk.usage.prompt_tokens
607604
accumulated_usage["output_tokens"] = chunk.usage.completion_tokens
@@ -617,13 +614,13 @@ async def _stream_generator(
617614

618615
delta = chunk.choices[0].delta if chunk.choices else None
619616

620-
# Reasoning Content
617+
# --- 1. Reasoning Content Handling ---
621618
delta_reasoning = (
622619
(getattr(delta, "reasoning_content", "") or "") if delta else ""
623620
)
624621
if delta_reasoning:
625622
full_reasoning += delta_reasoning
626-
# Reasoning is yielded immediately regardless of buffering state of content
623+
# 修复逻辑:无论是否 incremental,收到推理内容时都应输出
627624
if is_incremental:
628625
yield self._build_stream_response(
629626
content="",
@@ -633,49 +630,47 @@ async def _stream_generator(
633630
usage=accumulated_usage,
634631
request_id=request_id,
635632
)
633+
else:
634+
# 非增量模式下,输出当前累积的全量推理内容 + 当前累积的全量文本
635+
yield self._build_stream_response(
636+
content=full_text,
637+
reasoning_content=full_reasoning,
638+
tool_calls=None,
639+
finish_reason="null",
640+
usage=accumulated_usage,
641+
request_id=request_id,
642+
)
636643

637-
# Content
644+
# --- 2. Content Handling ---
638645
delta_content = delta.content if delta and delta.content else ""
639-
640646
content_to_yield = ""
641647

642648
if delta_content:
643649
if not stop_sequences:
644-
# No stop logic, pass through
645650
content_to_yield = delta_content
646651
full_text += delta_content
647652
else:
648-
# Buffer logic
649653
content_buffer += delta_content
650-
651-
# Check if buffer contains any stop sequence
652654
earliest_idx, _ = self._find_earliest_stop(
653655
content_buffer, stop_sequences
654656
)
655657

656658
if earliest_idx != -1:
657-
# Stop found
658659
stop_triggered = True
659660
finish_reason = "stop"
660-
661-
# Yield everything up to the stop sequence
662661
final_chunk = content_buffer[:earliest_idx]
663662
content_to_yield = final_chunk
664663
full_text += final_chunk
665-
content_buffer = "" # Clear buffer
666-
# We break the loop later after sending this final chunk
664+
content_buffer = ""
667665
else:
668-
# No stop found yet.
669-
# We can safely yield the part of the buffer that is "safe"
670-
# Simplest heuristic: Keep the last N characters where N is max_stop_len
671666
if len(content_buffer) > max_stop_len:
672667
safe_chars = len(content_buffer) - max_stop_len
673668
chunk_safe = content_buffer[:safe_chars]
674669
content_to_yield = chunk_safe
675670
full_text += chunk_safe
676671
content_buffer = content_buffer[safe_chars:]
677672

678-
# Tool calls logic
673+
# --- 3. Tool Calls Handling ---
679674
current_tool_calls_payload = None
680675
if delta and delta.tool_calls:
681676
if is_incremental:
@@ -715,32 +710,48 @@ async def _stream_generator(
715710
if upstream_finish != "null":
716711
finish_reason = upstream_finish
717712

718-
# Yield Content if we have something ready to go
719-
if is_incremental:
720-
if (
721-
content_to_yield
722-
or current_tool_calls_payload
723-
or (stop_triggered and finish_reason == "stop")
724-
):
713+
# --- 4. Yield Content Logic (Fixed) ---
714+
715+
# 判断是否有实质内容需要推送
716+
has_content_update = (
717+
content_to_yield
718+
or current_tool_calls_payload
719+
or (stop_triggered and finish_reason == "stop")
720+
)
721+
722+
if has_content_update:
723+
if is_incremental:
724+
# 增量模式:只推 delta
725725
yield self._build_stream_response(
726726
content=content_to_yield,
727-
reasoning_content="", # Already sent above
727+
reasoning_content="", # 推理已在上方单独处理
728728
tool_calls=current_tool_calls_payload,
729-
finish_reason=(
730-
finish_reason if stop_triggered else "null"
731-
), # Don't send upstream finish yet unless stopped
729+
finish_reason=(finish_reason if stop_triggered else "null"),
730+
usage=accumulated_usage,
731+
request_id=request_id,
732+
)
733+
else:
734+
# 非增量模式(全量模式):推送 full_text
735+
# 注意:非增量模式下,Tool Calls 通常建议在结束时统一发送,或者累积发送
736+
# 这里为了保持简洁,暂不实时推送未完成的 Tool Calls 结构,除非你需要
737+
yield self._build_stream_response(
738+
content=full_text,
739+
reasoning_content=full_reasoning,
740+
tool_calls=None, # 全量模式通常不流式传输部分工具调用,只传输文本
741+
finish_reason=(finish_reason if stop_triggered else "null"),
732742
usage=accumulated_usage,
733743
request_id=request_id,
734744
)
735745

736746
if stop_triggered:
737747
break
738748

739-
# End of Stream
749+
# --- End of Stream Handling ---
740750

741-
# If we have leftover buffer and weren't stopped, flush it now
751+
# Flush leftover buffer if not stopped
742752
if not stop_triggered and content_buffer and stop_sequences:
743753
full_text += content_buffer
754+
# 如果还有缓冲区剩余,根据模式推送
744755
if is_incremental:
745756
yield self._build_stream_response(
746757
content=content_buffer,
@@ -750,10 +761,19 @@ async def _stream_generator(
750761
usage=accumulated_usage,
751762
request_id=request_id,
752763
)
764+
else:
765+
yield self._build_stream_response(
766+
content=full_text,
767+
reasoning_content=full_reasoning,
768+
tool_calls=None,
769+
finish_reason="null",
770+
usage=accumulated_usage,
771+
request_id=request_id,
772+
)
753773

774+
# Final Finish Handling
754775
if not is_incremental:
755-
# For non-incremental (but stream=True upstream), we yield the whole thing at the end
756-
# Applying stop logic to the full text collected
776+
# 非增量模式的最后一包,包含 finish_reason 和完整的 Tool Calls
757777
if stop_sequences:
758778
earliest_idx, _ = self._find_earliest_stop(full_text, stop_sequences)
759779
if earliest_idx != -1:
@@ -775,7 +795,7 @@ async def _stream_generator(
775795
request_id=request_id,
776796
)
777797
else:
778-
# Send final finish reason for incremental mode if not already sent via stop trigger
798+
# 增量模式,如果没有通过 stop 触发结束,需要发一个空的结束包
779799
if not stop_triggered and finish_reason != "null":
780800
yield self._build_stream_response(
781801
content="",

0 commit comments

Comments
 (0)