@@ -74,11 +74,13 @@ def get_parts(self) -> list[ModelResponsePart]:
7474 return [p for p in self ._parts if not isinstance (p , ToolCallPartDelta )]
7575
7676 def finalize (self ) -> Generator [ModelResponseStreamEvent , None , None ]:
77- """Flush any buffered content as text parts .
77+ """Flush any buffered content, appending to ThinkingParts or creating TextParts .
7878
7979 This should be called when streaming is complete to ensure no content is lost.
80- Any content buffered in _thinking_tag_buffer that hasn't been processed will be
81- treated as regular text and emitted.
80+ Any content buffered in _thinking_tag_buffer will be appended to its corresponding
81+ ThinkingPart if one exists, otherwise it will be emitted as a TextPart.
82+
83+ The only possible buffered content to append to ThinkingParts are incomplete closing tags like `</th`
8284
8385 Yields:
8486 ModelResponseStreamEvent for any buffered content that gets flushed.
@@ -102,19 +104,27 @@ def finalize(self) -> Generator[ModelResponseStreamEvent, None, None]:
102104 yield PartStartEvent (index = part_index , part = text_part )
103105 self ._started_part_indices .add (part_index )
104106
105- # flush any remaining buffered content (partial tags like '<thi')
107+ # flush any remaining buffered content
106108 for vendor_part_id , buffered_content in list (self ._thinking_tag_buffer .items ()):
107109 if buffered_content :
108- # Remove the vendor_part_id mapping to avoid trying to update existing parts
109- # This ensures buffered partial tags create new TextParts
110- self ._vendor_id_to_part_index .pop (vendor_part_id , None )
111- yield from self ._handle_text_delta_simple (
112- vendor_part_id = vendor_part_id ,
113- content = buffered_content ,
114- id = None ,
115- thinking_tags = None ,
116- ignore_leading_whitespace = False ,
117- )
110+ part_index = self ._vendor_id_to_part_index .get (vendor_part_id )
111+
112+ # If buffered content belongs to a ThinkingPart, append it to the ThinkingPart
113+ # (for orphaned buffers like '</th')
114+ if part_index is not None and isinstance (self ._parts [part_index ], ThinkingPart ):
115+ yield from self .handle_thinking_delta (vendor_part_id = vendor_part_id , content = buffered_content )
116+ self ._vendor_id_to_part_index .pop (vendor_part_id )
117+ else :
118+ # Otherwise flush as TextPart
119+ # (for orphaned buffers like '<thi')
120+ self ._vendor_id_to_part_index .pop (vendor_part_id , None )
121+ yield from self ._handle_text_delta_simple (
122+ vendor_part_id = vendor_part_id ,
123+ content = buffered_content ,
124+ id = None ,
125+ thinking_tags = None ,
126+ ignore_leading_whitespace = False ,
127+ )
118128
119129 self ._thinking_tag_buffer .clear ()
120130
0 commit comments