Fix bug related to handling multiple result tools (#926)

dmontagu · web-flow · commit 15c5ef28ef8d · 2025-02-28T23:02:11.000Z
diff --git a/docs/agents.md b/docs/agents.md
@@ -141,7 +141,7 @@ async def main():
                 kind='response',
             )
         ),
-        End(data=FinalResult(data='Paris', tool_name=None)),
+        End(data=FinalResult(data='Paris', tool_name=None, tool_call_id=None)),
     ]
     """
     print(agent_run.result.data)
@@ -202,7 +202,7 @@ async def main():
                     kind='response',
                 )
             ),
-            End(data=FinalResult(data='Paris', tool_name=None)),
+            End(data=FinalResult(data='Paris', tool_name=None, tool_call_id=None)),
         ]
         """
 ```
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -454,8 +454,7 @@ async def _handle_tool_calls(
         final_result: result.FinalResult[NodeRunEndT] | None = None
         parts: list[_messages.ModelRequestPart] = []
         if result_schema is not None:
-            if match := result_schema.find_tool(tool_calls):
-                call, result_tool = match
+            for call, result_tool in result_schema.find_tool(tool_calls):
                 try:
                     result_data = result_tool.validate(call)
                     result_data = await _validate_result(result_data, ctx, call)
@@ -465,12 +464,17 @@ async def _handle_tool_calls(
                     ctx.state.increment_retries(ctx.deps.max_result_retries)
                     parts.append(e.tool_retry)
                 else:
-                    final_result = result.FinalResult(result_data, call.tool_name)
+                    final_result = result.FinalResult(result_data, call.tool_name, call.tool_call_id)
+                    break
 
         # Then build the other request parts based on end strategy
         tool_responses: list[_messages.ModelRequestPart] = self._tool_responses
         async for event in process_function_tools(
-            tool_calls, final_result and final_result.tool_name, ctx, tool_responses
+            tool_calls,
+            final_result and final_result.tool_name,
+            final_result and final_result.tool_call_id,
+            ctx,
+            tool_responses,
         ):
             yield event
 
@@ -518,7 +522,7 @@ async def _handle_text_response(
                 return ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[e.tool_retry]))
             else:
                 # The following cast is safe because we know `str` is an allowed result type
-                return self._handle_final_result(ctx, result.FinalResult(result_data, tool_name=None), [])
+                return self._handle_final_result(ctx, result.FinalResult(result_data, None, None), [])
         else:
             ctx.state.increment_retries(ctx.deps.max_result_retries)
             return ModelRequestNode[DepsT, NodeRunEndT](
@@ -547,6 +551,7 @@ def build_run_context(ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT
 async def process_function_tools(
     tool_calls: list[_messages.ToolCallPart],
     result_tool_name: str | None,
+    result_tool_call_id: str | None,
     ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]],
     output_parts: list[_messages.ModelRequestPart],
 ) -> AsyncIterator[_messages.HandleResponseEvent]:
@@ -566,7 +571,11 @@ async def process_function_tools(
     calls_to_run: list[tuple[Tool[DepsT], _messages.ToolCallPart]] = []
     call_index_to_event_id: dict[int, str] = {}
     for call in tool_calls:
-        if call.tool_name == result_tool_name and not found_used_result_tool:
+        if (
+            call.tool_name == result_tool_name
+            and call.tool_call_id == result_tool_call_id
+            and not found_used_result_tool
+        ):
             found_used_result_tool = True
             output_parts.append(
                 _messages.ToolReturnPart(
@@ -593,9 +602,14 @@ async def process_function_tools(
             # if tool_name is in _result_schema, it means we found a result tool but an error occurred in
             # validation, we don't add another part here
             if result_tool_name is not None:
+                if found_used_result_tool:
+                    content = 'Result tool not used - a final result was already processed.'
+                else:
+                    # TODO: Include information about the validation failure, and/or merge this with the ModelRetry part
+                    content = 'Result tool not used - result failed validation.'
                 part = _messages.ToolReturnPart(
                     tool_name=call.tool_name,
-                    content='Result tool not used - a final result was already processed.',
+                    content=content,
                     tool_call_id=call.tool_call_id,
                 )
                 output_parts.append(part)
diff --git a/pydantic_ai_slim/pydantic_ai/_result.py b/pydantic_ai_slim/pydantic_ai/_result.py
@@ -3,7 +3,7 @@
 import inspect
 import sys
 import types
-from collections.abc import Awaitable, Iterable
+from collections.abc import Awaitable, Iterable, Iterator
 from dataclasses import dataclass, field
 from typing import Any, Callable, Generic, Literal, Union, cast, get_args, get_origin
 
@@ -127,12 +127,12 @@ def find_named_tool(
     def find_tool(
         self,
         parts: Iterable[_messages.ModelResponsePart],
-    ) -> tuple[_messages.ToolCallPart, ResultTool[ResultDataT]] | None:
+    ) -> Iterator[tuple[_messages.ToolCallPart, ResultTool[ResultDataT]]]:
         """Find a tool that matches one of the calls."""
         for part in parts:
             if isinstance(part, _messages.ToolCallPart):
                 if result := self.tools.get(part.tool_name):
-                    return part, result
+                    yield part, result
 
     def tool_names(self) -> list[str]:
         """Return the names of the tools."""
diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -370,7 +370,7 @@ async def main():
                         kind='response',
                     )
                 ),
-                End(data=FinalResult(data='Paris', tool_name=None)),
+                End(data=FinalResult(data='Paris', tool_name=None, tool_call_id=None)),
             ]
             '''
             print(agent_run.result.data)
@@ -661,11 +661,10 @@ async def stream_to_final(
                                     new_part = maybe_part_event.part
                                     if isinstance(new_part, _messages.TextPart):
                                         if _agent_graph.allow_text_result(result_schema):
-                                            return FinalResult(s, None)
-                                    elif isinstance(new_part, _messages.ToolCallPart):
-                                        if result_schema is not None and (match := result_schema.find_tool([new_part])):
-                                            call, _ = match
-                                            return FinalResult(s, call.tool_name)
+                                            return FinalResult(s, None, None)
+                                    elif isinstance(new_part, _messages.ToolCallPart) and result_schema:
+                                        for call, _ in result_schema.find_tool([new_part]):
+                                            return FinalResult(s, call.tool_name, call.tool_call_id)
                             return None
 
                         final_result_details = await stream_to_final(streamed_response)
@@ -692,6 +691,7 @@ async def on_complete() -> None:
                                 async for _event in _agent_graph.process_function_tools(
                                     tool_calls,
                                     final_result_details.tool_name,
+                                    final_result_details.tool_call_id,
                                     graph_ctx,
                                     parts,
                                 ):
@@ -1258,7 +1258,7 @@ async def main():
                     kind='response',
                 )
             ),
-            End(data=FinalResult(data='Paris', tool_name=None)),
+            End(data=FinalResult(data='Paris', tool_name=None, tool_call_id=None)),
         ]
         '''
         print(agent_run.result.data)
@@ -1382,7 +1382,7 @@ async def main():
                             kind='response',
                         )
                     ),
-                    End(data=FinalResult(data='Paris', tool_name=None)),
+                    End(data=FinalResult(data='Paris', tool_name=None, tool_call_id=None)),
                 ]
                 '''
                 print('Final result:', agent_run.result.data)
diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -539,6 +539,8 @@ class FinalResultEvent:
 
     tool_name: str | None
     """The name of the result tool that was called. `None` if the result is from text content and not from a tool."""
+    tool_call_id: str | None
+    """The tool call ID, if any, that this result is associated with."""
     event_kind: Literal['final_result'] = 'final_result'
     """Event type identifier, used as a discriminator."""
 
diff --git a/pydantic_ai_slim/pydantic_ai/result.py b/pydantic_ai_slim/pydantic_ai/result.py
@@ -145,12 +145,14 @@ def _get_final_result_event(e: _messages.ModelResponseStreamEvent) -> _messages.
                 if isinstance(e, _messages.PartStartEvent):
                     new_part = e.part
                     if isinstance(new_part, _messages.ToolCallPart):
-                        if result_schema is not None and (match := result_schema.find_tool([new_part])):
-                            call, _ = match
-                            return _messages.FinalResultEvent(tool_name=call.tool_name)
+                        if result_schema:
+                            for call, _ in result_schema.find_tool([new_part]):
+                                return _messages.FinalResultEvent(
+                                    tool_name=call.tool_name, tool_call_id=call.tool_call_id
+                                )
                     elif allow_text_result:
                         assert_type(e, _messages.PartStartEvent)
-                        return _messages.FinalResultEvent(tool_name=None)
+                        return _messages.FinalResultEvent(tool_name=None, tool_call_id=None)
 
             usage_checking_stream = _get_usage_checking_stream_response(
                 self._raw_stream_response, self._usage_limits, self.usage
@@ -472,6 +474,8 @@ class FinalResult(Generic[ResultDataT]):
     """The final result data."""
     tool_name: str | None
     """Name of the final result tool; `None` if the result came from unstructured text content."""
+    tool_call_id: str | None
+    """ID of the tool call that produced the final result; `None` if the result came from unstructured text content."""
 
 
 def _get_usage_checking_stream_response(
diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -1183,6 +1183,42 @@ def regular_tool(x: int) -> int:
         tool_returns = [m for m in result.all_messages() if isinstance(m, ToolReturnPart)]
         assert tool_returns == snapshot([])
 
+    def test_multiple_final_result_are_validated_correctly(self):
+        """Tests that if multiple final results are returned, but one fails validation, the other is used."""
+
+        def return_model(_: list[ModelMessage], info: AgentInfo) -> ModelResponse:
+            assert info.result_tools is not None
+            return ModelResponse(
+                parts=[
+                    ToolCallPart('final_result', {'bad_value': 'first'}, tool_call_id='first'),
+                    ToolCallPart('final_result', {'value': 'second'}, tool_call_id='second'),
+                ]
+            )
+
+        agent = Agent(FunctionModel(return_model), result_type=self.ResultType, end_strategy='early')
+        result = agent.run_sync('test multiple final results')
+
+        # Verify the result came from the second final tool
+        assert result.data.value == 'second'
+
+        # Verify we got appropriate tool returns
+        assert result.new_messages()[-1].parts == snapshot(
+            [
+                ToolReturnPart(
+                    tool_name='final_result',
+                    tool_call_id='first',
+                    content='Result tool not used - result failed validation.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                ToolReturnPart(
+                    tool_name='final_result',
+                    content='Final result processed.',
+                    timestamp=IsNow(tz=timezone.utc),
+                    tool_call_id='second',
+                ),
+            ]
+        )
+
 
 async def test_model_settings_override() -> None:
     def return_settings(_: list[ModelMessage], info: AgentInfo) -> ModelResponse:
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
@@ -768,7 +768,7 @@ def result_validator_simple(data: str) -> str:
                     async for chunk in stream.stream_output(debounce_by=None):
                         messages.append(chunk)
                 stream_usage = deepcopy(stream.usage())
-    assert run.next_node == End(data=FinalResult(data='The bat sat on the mat.', tool_name=None))
+    assert run.next_node == End(data=FinalResult(data='The bat sat on the mat.', tool_name=None, tool_call_id=None))
     assert (
         run.usage()
         == stream_usage

Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,7 @@ async def main():`
`141`	`141`	`kind='response',`
`142`	`142`	`)`
`143`	`143`	`),`
`144`		`- End(data=FinalResult(data='Paris', tool_name=None)),`
	`144`	`+ End(data=FinalResult(data='Paris', tool_name=None, tool_call_id=None)),`
`145`	`145`	`]`
`146`	`146`	`"""`
`147`	`147`	`print(agent_run.result.data)`
`@@ -202,7 +202,7 @@ async def main():`
`202`	`202`	`kind='response',`
`203`	`203`	`)`
`204`	`204`	`),`
`205`		`- End(data=FinalResult(data='Paris', tool_name=None)),`
	`205`	`+ End(data=FinalResult(data='Paris', tool_name=None, tool_call_id=None)),`
`206`	`206`	`]`
`207`	`207`	`"""`
`208`	`208`	```