Don't include ToolResultPart for external tool call when streaming (#3112)

DouweM · web-flow · commit f28c6fcd8a2a · 2025-10-08T09:39:32.000Z
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -795,16 +795,14 @@ async def process_tool_calls(  # noqa: C901
     # Then, we handle function tool calls
     calls_to_run: list[_messages.ToolCallPart] = []
     if final_result and ctx.deps.end_strategy == 'early':
-        output_parts.extend(
-            [
+        for call in tool_calls_by_kind['function']:
+            output_parts.append(
                 _messages.ToolReturnPart(
                     tool_name=call.tool_name,
                     content='Tool not executed - a final result was already processed.',
                     tool_call_id=call.tool_call_id,
                 )
-                for call in tool_calls_by_kind['function']
-            ]
-        )
+            )
     else:
         calls_to_run.extend(tool_calls_by_kind['function'])
 
@@ -850,14 +848,17 @@ async def process_tool_calls(  # noqa: C901
     if tool_call_results is None:
         calls = [*tool_calls_by_kind['external'], *tool_calls_by_kind['unapproved']]
         if final_result:
-            for call in calls:
-                output_parts.append(
-                    _messages.ToolReturnPart(
-                        tool_name=call.tool_name,
-                        content='Tool not executed - a final result was already processed.',
-                        tool_call_id=call.tool_call_id,
+            # If the run was already determined to end on deferred tool calls,
+            # we shouldn't insert return parts as the deferred tools will still get a real result.
+            if not isinstance(final_result.output, _output.DeferredToolRequests):
+                for call in calls:
+                    output_parts.append(
+                        _messages.ToolReturnPart(
+                            tool_name=call.tool_name,
+                            content='Tool not executed - a final result was already processed.',
+                            tool_call_id=call.tool_call_id,
+                        )
                     )
-                )
         elif calls:
             deferred_calls['external'].extend(tool_calls_by_kind['external'])
             deferred_calls['unapproved'].extend(tool_calls_by_kind['unapproved'])
diff --git a/pydantic_ai_slim/pydantic_ai/agent/abstract.py b/pydantic_ai_slim/pydantic_ai/agent/abstract.py
@@ -489,7 +489,7 @@ async def stream_to_final(
 
                         if final_result_event is not None:
                             final_result = FinalResult(
-                                stream, final_result_event.tool_name, final_result_event.tool_call_id
+                                None, final_result_event.tool_name, final_result_event.tool_call_id
                             )
                             if yielded:
                                 raise exceptions.AgentRunError('Agent run produced final results')  # pragma: no cover
@@ -503,16 +503,15 @@ async def on_complete() -> None:
                                 The model response will have been added to messages by now
                                 by `StreamedRunResult._marked_completed`.
                                 """
-                                last_message = messages[-1]
-                                assert isinstance(last_message, _messages.ModelResponse)
-                                tool_calls = [
-                                    part for part in last_message.parts if isinstance(part, _messages.ToolCallPart)
-                                ]
+                                nonlocal final_result
+                                final_result = FinalResult(
+                                    await stream.get_output(), final_result.tool_name, final_result.tool_call_id
+                                )
 
                                 parts: list[_messages.ModelRequestPart] = []
                                 async for _event in _agent_graph.process_tool_calls(
                                     tool_manager=graph_ctx.deps.tool_manager,
-                                    tool_calls=tool_calls,
+                                    tool_calls=stream.response.tool_calls,
                                     tool_call_results=None,
                                     final_result=final_result,
                                     ctx=graph_ctx,
diff --git a/pydantic_ai_slim/pydantic_ai/result.py b/pydantic_ai_slim/pydantic_ai/result.py
@@ -67,20 +67,20 @@ async def stream_output(self, *, debounce_by: float | None = 0.1) -> AsyncIterat
                 except ValidationError:
                     pass
         if self._raw_stream_response.final_result_event is not None:  # pragma: no branch
-            yield await self.validate_response_output(self._raw_stream_response.get())
+            yield await self.validate_response_output(self.response)
 
     async def stream_responses(self, *, debounce_by: float | None = 0.1) -> AsyncIterator[_messages.ModelResponse]:
         """Asynchronously stream the (unvalidated) model responses for the agent."""
         # if the message currently has any parts with content, yield before streaming
-        msg = self._raw_stream_response.get()
+        msg = self.response
         for part in msg.parts:
             if part.has_content():
                 yield msg
                 break
 
         async with _utils.group_by_temporal(self, debounce_by) as group_iter:
             async for _items in group_iter:
-                yield self._raw_stream_response.get()  # current state of the response
+                yield self.response  # current state of the response
 
     async def stream_text(self, *, delta: bool = False, debounce_by: float | None = 0.1) -> AsyncIterator[str]:
         """Stream the text result as an async iterable.
@@ -136,7 +136,7 @@ async def get_output(self) -> OutputDataT:
         async for _ in self:
             pass
 
-        return await self.validate_response_output(self._raw_stream_response.get())
+        return await self.validate_response_output(self.response)
 
     async def validate_response_output(
         self, message: _messages.ModelResponse, *, allow_partial: bool = False
@@ -201,7 +201,7 @@ async def _stream_text_deltas_ungrouped() -> AsyncIterator[tuple[str, int]]:
             # yields tuples of (text_content, part_index)
             # we don't currently make use of the part_index, but in principle this may be useful
             # so we retain it here for now to make possible future refactors simpler
-            msg = self._raw_stream_response.get()
+            msg = self.response
             for i, part in enumerate(msg.parts):
                 if isinstance(part, _messages.TextPart) and part.content:
                     yield part.content, i
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
@@ -18,6 +18,7 @@
     AgentRunResult,
     AgentRunResultEvent,
     AgentStreamEvent,
+    ExternalToolset,
     FinalResultEvent,
     FunctionToolCallEvent,
     FunctionToolResultEvent,
@@ -819,6 +820,182 @@ def another_tool(y: int) -> int:  # pragma: no cover
     )
 
 
+async def test_early_strategy_with_external_tool_call():
+    tool_called: list[str] = []
+
+    async def sf(_: list[ModelMessage], info: AgentInfo) -> AsyncIterator[str | DeltaToolCalls]:
+        assert info.output_tools is not None
+        yield {1: DeltaToolCall('external_tool')}
+        yield {2: DeltaToolCall('final_result', '{"value": "final"}')}
+        yield {3: DeltaToolCall('regular_tool', '{"x": 1}')}
+
+    agent = Agent(
+        FunctionModel(stream_function=sf),
+        output_type=[OutputType, DeferredToolRequests],
+        toolsets=[
+            ExternalToolset(
+                tool_defs=[
+                    ToolDefinition(
+                        name='external_tool',
+                        kind='external',
+                    )
+                ]
+            )
+        ],
+        end_strategy='early',
+    )
+
+    @agent.tool_plain
+    def regular_tool(x: int) -> int:  # pragma: no cover
+        """A regular tool that should not be called."""
+        tool_called.append('regular_tool')
+        return x
+
+    async with agent.run_stream('test early strategy with external tool call') as result:
+        response = await result.get_output()
+        assert response == snapshot(
+            DeferredToolRequests(
+                calls=[
+                    ToolCallPart(
+                        tool_name='external_tool',
+                        tool_call_id=IsStr(),
+                    )
+                ]
+            )
+        )
+        messages = result.all_messages()
+
+    # Verify no tools were called
+    assert tool_called == []
+
+    # Verify we got appropriate tool returns
+    assert messages == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(
+                        content='test early strategy with external tool call',
+                        timestamp=IsNow(tz=datetime.timezone.utc),
+                        part_kind='user-prompt',
+                    )
+                ],
+                kind='request',
+            ),
+            ModelResponse(
+                parts=[
+                    ToolCallPart(tool_name='external_tool', tool_call_id=IsStr()),
+                    ToolCallPart(
+                        tool_name='final_result',
+                        args='{"value": "final"}',
+                        tool_call_id=IsStr(),
+                    ),
+                    ToolCallPart(
+                        tool_name='regular_tool',
+                        args='{"x": 1}',
+                        tool_call_id=IsStr(),
+                    ),
+                ],
+                usage=RequestUsage(input_tokens=50, output_tokens=7),
+                model_name='function::sf',
+                timestamp=IsNow(tz=datetime.timezone.utc),
+                kind='response',
+            ),
+            ModelRequest(
+                parts=[
+                    ToolReturnPart(
+                        tool_name='final_result',
+                        content='Output tool not used - a final result was already processed.',
+                        tool_call_id=IsStr(),
+                        timestamp=IsNow(tz=datetime.timezone.utc),
+                    ),
+                    ToolReturnPart(
+                        tool_name='regular_tool',
+                        content='Tool not executed - a final result was already processed.',
+                        tool_call_id=IsStr(),
+                        timestamp=IsNow(tz=datetime.timezone.utc),
+                    ),
+                ],
+                kind='request',
+            ),
+        ]
+    )
+
+
+async def test_early_strategy_with_deferred_tool_call():
+    tool_called: list[str] = []
+
+    async def sf(_: list[ModelMessage], info: AgentInfo) -> AsyncIterator[str | DeltaToolCalls]:
+        assert info.output_tools is not None
+        yield {1: DeltaToolCall('deferred_tool')}
+        yield {2: DeltaToolCall('regular_tool', '{"x": 1}')}
+
+    agent = Agent(
+        FunctionModel(stream_function=sf),
+        output_type=[str, DeferredToolRequests],
+        end_strategy='early',
+    )
+
+    @agent.tool_plain
+    def deferred_tool() -> int:
+        raise CallDeferred
+
+    @agent.tool_plain
+    def regular_tool(x: int) -> int:
+        tool_called.append('regular_tool')
+        return x
+
+    async with agent.run_stream('test early strategy with external tool call') as result:
+        response = await result.get_output()
+        assert response == snapshot(
+            DeferredToolRequests(calls=[ToolCallPart(tool_name='deferred_tool', tool_call_id=IsStr())])
+        )
+        messages = result.all_messages()
+
+    # Verify no tools were called
+    assert tool_called == ['regular_tool']
+
+    # Verify we got appropriate tool returns
+    assert messages == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(
+                        content='test early strategy with external tool call',
+                        timestamp=IsNow(tz=datetime.timezone.utc),
+                        part_kind='user-prompt',
+                    )
+                ],
+                kind='request',
+            ),
+            ModelResponse(
+                parts=[
+                    ToolCallPart(tool_name='deferred_tool', tool_call_id=IsStr()),
+                    ToolCallPart(
+                        tool_name='regular_tool',
+                        args='{"x": 1}',
+                        tool_call_id=IsStr(),
+                    ),
+                ],
+                usage=RequestUsage(input_tokens=50, output_tokens=3),
+                model_name='function::sf',
+                timestamp=IsNow(tz=datetime.timezone.utc),
+                kind='response',
+            ),
+            ModelRequest(
+                parts=[
+                    ToolReturnPart(
+                        tool_name='regular_tool',
+                        content=1,
+                        tool_call_id=IsStr(),
+                        timestamp=IsNow(tz=datetime.timezone.utc),
+                    )
+                ],
+                kind='request',
+            ),
+        ]
+    )
+
+
 async def test_early_strategy_does_not_apply_to_tool_calls_without_final_tool():
     """Test that 'early' strategy does not apply to tool calls without final tool."""
     tool_called: list[str] = []