1313
1414from __future__ import annotations as _annotations
1515
16- from collections .abc import Hashable
16+ from collections .abc import Generator , Hashable
1717from dataclasses import dataclass , field , replace
1818from typing import Any
1919
@@ -58,6 +58,8 @@ class ModelResponsePartsManager:
5858 """A list of parts (text or tool calls) that make up the current state of the model's response."""
5959 _vendor_id_to_part_index : dict [VendorId , int ] = field (default_factory = dict , init = False )
6060 """Maps a vendor's "part" ID (if provided) to the index in `_parts` where that part resides."""
61+ _tag_buffer : dict [VendorId , str ] = field (default_factory = dict , init = False )
62+ """Buffers partial content when thinking tags might be split across chunks."""
6163
6264 def get_parts (self ) -> list [ModelResponsePart ]:
6365 """Return only model response parts that are complete (i.e., not ToolCallPartDelta's).
@@ -75,82 +77,159 @@ def handle_text_delta(
7577 id : str | None = None ,
7678 thinking_tags : tuple [str , str ] | None = None ,
7779 ignore_leading_whitespace : bool = False ,
78- ) -> ModelResponseStreamEvent | None :
80+ ) -> Generator [ ModelResponseStreamEvent , None , None ] :
7981 """Handle incoming text content, creating or updating a TextPart in the manager as appropriate.
8082
8183 When `vendor_part_id` is None, the latest part is updated if it exists and is a TextPart;
8284 otherwise, a new TextPart is created. When a non-None ID is specified, the TextPart corresponding
8385 to that vendor ID is either created or updated.
8486
87+ Thinking tags may be split across multiple chunks. When `thinking_tags` is provided and
88+ `vendor_part_id` is not None, this method buffers content that could be the start of a
89+ thinking tag appearing at the beginning of the current chunk.
90+
8591 Args:
8692 vendor_part_id: The ID the vendor uses to identify this piece
8793 of text. If None, a new part will be created unless the latest part is already
8894 a TextPart.
8995 content: The text content to append to the appropriate TextPart.
9096 id: An optional id for the text part.
9197 thinking_tags: If provided, will handle content between the thinking tags as thinking parts.
98+ Buffering for split tags requires a non-None vendor_part_id.
9299 ignore_leading_whitespace: If True, will ignore leading whitespace in the content.
93100
94- Returns :
95- - A `PartStartEvent` if a new part was created.
96- - A `PartDeltaEvent` if an existing part was updated.
97- - `None` if no new event is emitted (e.g., the first text part was all whitespace) .
101+ Yields :
102+ - `PartStartEvent` if a new part was created.
103+ - `PartDeltaEvent` if an existing part was updated.
104+ May yield multiple events from a single call if buffered content is flushed .
98105
99106 Raises:
100107 UnexpectedModelBehavior: If attempting to apply text content to a part that is not a TextPart.
101108 """
109+ if thinking_tags and vendor_part_id is not None :
110+ yield from self ._handle_text_delta_with_thinking_tags (
111+ vendor_part_id = vendor_part_id ,
112+ content = content ,
113+ id = id ,
114+ thinking_tags = thinking_tags ,
115+ ignore_leading_whitespace = ignore_leading_whitespace ,
116+ )
117+ else :
118+ yield from self ._handle_text_delta_simple (
119+ vendor_part_id = vendor_part_id ,
120+ content = content ,
121+ id = id ,
122+ thinking_tags = thinking_tags ,
123+ ignore_leading_whitespace = ignore_leading_whitespace ,
124+ )
125+
126+ def _handle_text_delta_simple (
127+ self ,
128+ * ,
129+ vendor_part_id : VendorId | None ,
130+ content : str ,
131+ id : str | None ,
132+ thinking_tags : tuple [str , str ] | None ,
133+ ignore_leading_whitespace : bool ,
134+ ) -> Generator [ModelResponseStreamEvent , None , None ]:
135+ """Handle text delta without split tag buffering (original logic)."""
102136 existing_text_part_and_index : tuple [TextPart , int ] | None = None
103137
104138 if vendor_part_id is None :
105- # If the vendor_part_id is None, check if the latest part is a TextPart to update
106139 if self ._parts :
107140 part_index = len (self ._parts ) - 1
108141 latest_part = self ._parts [part_index ]
109142 if isinstance (latest_part , TextPart ):
110143 existing_text_part_and_index = latest_part , part_index
111144 else :
112- # Otherwise, attempt to look up an existing TextPart by vendor_part_id
113145 part_index = self ._vendor_id_to_part_index .get (vendor_part_id )
114146 if part_index is not None :
115147 existing_part = self ._parts [part_index ]
116148
117149 if thinking_tags and isinstance (existing_part , ThinkingPart ):
118- # We may be building a thinking part instead of a text part if we had previously seen a thinking tag
119150 if content == thinking_tags [1 ]:
120- # When we see the thinking end tag, we're done with the thinking part and the next text delta will need a new part
121151 self ._vendor_id_to_part_index .pop (vendor_part_id )
122- return None
152+ return
123153 else :
124- return self .handle_thinking_delta (vendor_part_id = vendor_part_id , content = content )
154+ yield self .handle_thinking_delta (vendor_part_id = vendor_part_id , content = content )
155+ return
125156 elif isinstance (existing_part , TextPart ):
126157 existing_text_part_and_index = existing_part , part_index
127158 else :
128159 raise UnexpectedModelBehavior (f'Cannot apply a text delta to { existing_part = } ' )
129160
130161 if thinking_tags and content == thinking_tags [0 ]:
131- # When we see a thinking start tag (which is a single token), we'll build a new thinking part instead
132162 self ._vendor_id_to_part_index .pop (vendor_part_id , None )
133- return self .handle_thinking_delta (vendor_part_id = vendor_part_id , content = '' )
163+ yield self .handle_thinking_delta (vendor_part_id = vendor_part_id , content = '' )
164+ return
134165
135166 if existing_text_part_and_index is None :
136- # This is a workaround for models that emit `<think>\n</think>\n\n` or an empty text part ahead of tool calls (e.g. Ollama + Qwen3),
137- # which we don't want to end up treating as a final result when using `run_stream` with `str` a valid `output_type`.
138167 if ignore_leading_whitespace and (len (content ) == 0 or content .isspace ()):
139- return None
168+ return
140169
141- # There is no existing text part that should be updated, so create a new one
142170 new_part_index = len (self ._parts )
143171 part = TextPart (content = content , id = id )
144172 if vendor_part_id is not None :
145173 self ._vendor_id_to_part_index [vendor_part_id ] = new_part_index
146174 self ._parts .append (part )
147- return PartStartEvent (index = new_part_index , part = part )
175+ yield PartStartEvent (index = new_part_index , part = part )
148176 else :
149- # Update the existing TextPart with the new content delta
150177 existing_text_part , part_index = existing_text_part_and_index
151178 part_delta = TextPartDelta (content_delta = content )
152179 self ._parts [part_index ] = part_delta .apply (existing_text_part )
153- return PartDeltaEvent (index = part_index , delta = part_delta )
180+ yield PartDeltaEvent (index = part_index , delta = part_delta )
181+
182+ def _handle_text_delta_with_thinking_tags (
183+ self ,
184+ * ,
185+ vendor_part_id : VendorId ,
186+ content : str ,
187+ id : str | None ,
188+ thinking_tags : tuple [str , str ],
189+ ignore_leading_whitespace : bool ,
190+ ) -> Generator [ModelResponseStreamEvent , None , None ]:
191+ """Handle text delta with thinking tag detection and buffering for split tags."""
192+ start_tag , end_tag = thinking_tags
193+ buffered = self ._tag_buffer .get (vendor_part_id , '' )
194+ combined_content = buffered + content
195+
196+ part_index = self ._vendor_id_to_part_index .get (vendor_part_id )
197+ existing_part = self ._parts [part_index ] if part_index is not None else None
198+
199+ if existing_part is not None and isinstance (existing_part , ThinkingPart ):
200+ if combined_content == end_tag :
201+ self ._vendor_id_to_part_index .pop (vendor_part_id )
202+ self ._tag_buffer .pop (vendor_part_id , None )
203+ return
204+ else :
205+ self ._tag_buffer .pop (vendor_part_id , None )
206+ yield self .handle_thinking_delta (vendor_part_id = vendor_part_id , content = combined_content )
207+ return
208+
209+ if combined_content == start_tag :
210+ self ._tag_buffer .pop (vendor_part_id , None )
211+ self ._vendor_id_to_part_index .pop (vendor_part_id , None )
212+ yield self .handle_thinking_delta (vendor_part_id = vendor_part_id , content = '' )
213+ return
214+
215+ if content .startswith (start_tag [0 ]) and self ._could_be_tag_start (combined_content , start_tag ):
216+ self ._tag_buffer [vendor_part_id ] = combined_content
217+ return
218+
219+ self ._tag_buffer .pop (vendor_part_id , None )
220+ yield from self ._handle_text_delta_simple (
221+ vendor_part_id = vendor_part_id ,
222+ content = combined_content ,
223+ id = id ,
224+ thinking_tags = thinking_tags ,
225+ ignore_leading_whitespace = ignore_leading_whitespace ,
226+ )
227+
228+ def _could_be_tag_start (self , content : str , tag : str ) -> bool :
229+ """Check if content could be the start of a tag."""
230+ if len (content ) >= len (tag ):
231+ return False
232+ return tag .startswith (content )
154233
155234 def handle_thinking_delta (
156235 self ,
0 commit comments