Fix AgentStream.stream_output and StreamedRunResult.stream_structured with output tools (#2314)

DouweM · claude[bot] · web-flow · commit 168680aed867 · 2025-07-28T17:03:00.000-06:00
Co-authored-by: claude[bot] &lt;209825114+claude[bot]@users.noreply.github.com&gt;
Co-authored-by: Douwe Maan &lt;DouweM@users.noreply.github.com&gt;
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -54,20 +54,25 @@ def get_tool_def(self, name: str) -> ToolDefinition | None:
         except KeyError:
             return None
 
-    async def handle_call(self, call: ToolCallPart, allow_partial: bool = False) -> Any:
+    async def handle_call(
+        self, call: ToolCallPart, allow_partial: bool = False, wrap_validation_errors: bool = True
+    ) -> Any:
         """Handle a tool call by validating the arguments, calling the tool, and handling retries.
 
         Args:
             call: The tool call part to handle.
             allow_partial: Whether to allow partial validation of the tool arguments.
+            wrap_validation_errors: Whether to wrap validation errors in a retry prompt part.
         """
         if (tool := self.tools.get(call.tool_name)) and tool.tool_def.kind == 'output':
             # Output tool calls are not traced
-            return await self._call_tool(call, allow_partial)
+            return await self._call_tool(call, allow_partial, wrap_validation_errors)
         else:
-            return await self._call_tool_traced(call, allow_partial)
+            return await self._call_tool_traced(call, allow_partial, wrap_validation_errors)
 
-    async def _call_tool(self, call: ToolCallPart, allow_partial: bool = False) -> Any:
+    async def _call_tool(
+        self, call: ToolCallPart, allow_partial: bool = False, wrap_validation_errors: bool = True
+    ) -> Any:
         name = call.tool_name
         tool = self.tools.get(name)
         try:
@@ -100,30 +105,35 @@ async def _call_tool(self, call: ToolCallPart, allow_partial: bool = False) -> A
             if current_retry == max_retries:
                 raise UnexpectedModelBehavior(f'Tool {name!r} exceeded max retries count of {max_retries}') from e
             else:
-                if isinstance(e, ValidationError):
-                    m = _messages.RetryPromptPart(
-                        tool_name=name,
-                        content=e.errors(include_url=False, include_context=False),
-                        tool_call_id=call.tool_call_id,
-                    )
-                    e = ToolRetryError(m)
-                elif isinstance(e, ModelRetry):
-                    m = _messages.RetryPromptPart(
-                        tool_name=name,
-                        content=e.message,
-                        tool_call_id=call.tool_call_id,
-                    )
-                    e = ToolRetryError(m)
-                else:
-                    assert_never(e)
+                if wrap_validation_errors:
+                    if isinstance(e, ValidationError):
+                        m = _messages.RetryPromptPart(
+                            tool_name=name,
+                            content=e.errors(include_url=False, include_context=False),
+                            tool_call_id=call.tool_call_id,
+                        )
+                        e = ToolRetryError(m)
+                    elif isinstance(e, ModelRetry):
+                        m = _messages.RetryPromptPart(
+                            tool_name=name,
+                            content=e.message,
+                            tool_call_id=call.tool_call_id,
+                        )
+                        e = ToolRetryError(m)
+                    else:
+                        assert_never(e)
+
+                if not allow_partial:
+                    self.ctx.retries[name] = current_retry + 1
 
-                self.ctx.retries[name] = current_retry + 1
                 raise e
         else:
             self.ctx.retries.pop(name, None)
             return output
 
-    async def _call_tool_traced(self, call: ToolCallPart, allow_partial: bool = False) -> Any:
+    async def _call_tool_traced(
+        self, call: ToolCallPart, allow_partial: bool = False, wrap_validation_errors: bool = True
+    ) -> Any:
         """See <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span>."""
         span_attributes = {
             'gen_ai.tool.name': call.tool_name,
@@ -152,7 +162,7 @@ async def _call_tool_traced(self, call: ToolCallPart, allow_partial: bool = Fals
         }
         with self.ctx.tracer.start_as_current_span('running tool', attributes=span_attributes) as span:
             try:
-                tool_result = await self._call_tool(call, allow_partial)
+                tool_result = await self._call_tool(call, allow_partial, wrap_validation_errors)
             except ToolRetryError as e:
                 part = e.tool_retry
                 if self.ctx.trace_include_content and span.is_recording():
diff --git a/pydantic_ai_slim/pydantic_ai/result.py b/pydantic_ai_slim/pydantic_ai/result.py
@@ -67,7 +67,7 @@ async def stream_output(self, *, debounce_by: float | None = 0.1) -> AsyncIterat
                 except ValidationError:
                     pass
         if self._final_result_event is not None:  # pragma: no branch
-            yield await self._validate_response(self._raw_stream_response.get(), allow_partial=False)
+            yield await self._validate_response(self._raw_stream_response.get())
 
     async def stream_responses(self, *, debounce_by: float | None = 0.1) -> AsyncIterator[_messages.ModelResponse]:
         """Asynchronously stream the (unvalidated) model responses for the agent."""
@@ -128,7 +128,7 @@ async def get_output(self) -> OutputDataT:
         async for _ in self:
             pass
 
-        return await self._validate_response(self._raw_stream_response.get(), allow_partial=False)
+        return await self._validate_response(self._raw_stream_response.get())
 
     async def _validate_response(self, message: _messages.ModelResponse, *, allow_partial: bool = False) -> OutputDataT:
         """Validate a structured result message."""
@@ -150,7 +150,9 @@ async def _validate_response(self, message: _messages.ModelResponse, *, allow_pa
                 raise exceptions.UnexpectedModelBehavior(  # pragma: no cover
                     f'Invalid response, unable to find tool call for {output_tool_name!r}'
                 )
-            return await self._tool_manager.handle_call(tool_call, allow_partial=allow_partial)
+            return await self._tool_manager.handle_call(
+                tool_call, allow_partial=allow_partial, wrap_validation_errors=False
+            )
         elif deferred_tool_calls := self._tool_manager.get_deferred_tool_calls(message.parts):
             if not self._output_schema.allows_deferred_tool_calls:
                 raise exceptions.UserError(
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
@@ -1108,6 +1108,42 @@ class CityLocation(BaseModel):
                     )
 
 
+async def test_iter_stream_output_tool_dont_hit_retry_limit():
+    class CityLocation(BaseModel):
+        city: str
+        country: str | None = None
+
+    async def text_stream(_messages: list[ModelMessage], agent_info: AgentInfo) -> AsyncIterator[DeltaToolCalls]:
+        """Stream partial JSON data that will initially fail validation."""
+        assert agent_info.output_tools is not None
+        assert len(agent_info.output_tools) == 1
+        name = agent_info.output_tools[0].name
+
+        yield {0: DeltaToolCall(name=name)}
+        yield {0: DeltaToolCall(json_args='{"c')}
+        yield {0: DeltaToolCall(json_args='ity":')}
+        yield {0: DeltaToolCall(json_args=' "Mex')}
+        yield {0: DeltaToolCall(json_args='ico City",')}
+        yield {0: DeltaToolCall(json_args=' "cou')}
+        yield {0: DeltaToolCall(json_args='ntry": "Mexico"}')}
+
+    agent = Agent(FunctionModel(stream_function=text_stream), output_type=CityLocation)
+
+    async with agent.iter('Generate city info') as run:
+        async for node in run:
+            if agent.is_model_request_node(node):
+                async with node.stream(run.ctx) as stream:
+                    assert [c async for c in stream.stream_output(debounce_by=None)] == snapshot(
+                        [
+                            CityLocation(city='Mex'),
+                            CityLocation(city='Mexico City'),
+                            CityLocation(city='Mexico City'),
+                            CityLocation(city='Mexico City', country='Mexico'),
+                            CityLocation(city='Mexico City', country='Mexico'),
+                        ]
+                    )
+
+
 def test_function_tool_event_tool_call_id_properties():
     """Ensure that the `tool_call_id` property on function tool events mirrors the underlying part's ID."""
     # Prepare a ToolCallPart with a fixed ID