Refine retry logic for parallel tool calling (#2317)

DouweM · claude[bot] · web-flow · commit ab92e67856b1 · 2025-07-28T23:23:30.000Z
Co-authored-by: claude[bot] &lt;209825114+claude[bot]@users.noreply.github.com&gt;
Co-authored-by: Douwe Maan &lt;DouweM@users.noreply.github.com&gt;
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -2,18 +2,17 @@
 
 import json
 from collections.abc import Iterable
-from dataclasses import dataclass, replace
+from dataclasses import dataclass, field, replace
 from typing import Any, Generic
 
 from pydantic import ValidationError
 from typing_extensions import assert_never
 
-from pydantic_ai.output import DeferredToolCalls
-
 from . import messages as _messages
 from ._run_context import AgentDepsT, RunContext
 from .exceptions import ModelRetry, ToolRetryError, UnexpectedModelBehavior
 from .messages import ToolCallPart
+from .output import DeferredToolCalls
 from .tools import ToolDefinition
 from .toolsets.abstract import AbstractToolset, ToolsetTool
 
@@ -28,6 +27,8 @@ class ToolManager(Generic[AgentDepsT]):
     """The toolset that provides the tools for this run step."""
     tools: dict[str, ToolsetTool[AgentDepsT]]
     """The cached tools for this run step."""
+    failed_tools: set[str] = field(default_factory=set)
+    """Names of tools that failed in this run step."""
 
     @classmethod
     async def build(cls, toolset: AbstractToolset[AgentDepsT], ctx: RunContext[AgentDepsT]) -> ToolManager[AgentDepsT]:
@@ -40,7 +41,10 @@ async def build(cls, toolset: AbstractToolset[AgentDepsT], ctx: RunContext[Agent
 
     async def for_run_step(self, ctx: RunContext[AgentDepsT]) -> ToolManager[AgentDepsT]:
         """Build a new tool manager for the next run step, carrying over the retries from the current run step."""
-        return await self.__class__.build(self.toolset, replace(ctx, retries=self.ctx.retries))
+        retries = {
+            failed_tool_name: self.ctx.retries.get(failed_tool_name, 0) + 1 for failed_tool_name in self.failed_tools
+        }
+        return await self.__class__.build(self.toolset, replace(ctx, retries=retries))
 
     @property
     def tool_defs(self) -> list[ToolDefinition]:
@@ -97,7 +101,7 @@ async def _call_tool(
             else:
                 args_dict = validator.validate_python(call.args or {}, allow_partial=pyd_allow_partial)
 
-            output = await self.toolset.call_tool(name, args_dict, ctx, tool)
+            return await self.toolset.call_tool(name, args_dict, ctx, tool)
         except (ValidationError, ModelRetry) as e:
             max_retries = tool.max_retries if tool is not None else 1
             current_retry = self.ctx.retries.get(name, 0)
@@ -124,12 +128,10 @@ async def _call_tool(
                         assert_never(e)
 
                 if not allow_partial:
-                    self.ctx.retries[name] = current_retry + 1
+                    # If we're validating partial arguments, we don't want to count this as a failed tool as it may still succeed once the full arguments are received.
+                    self.failed_tools.add(name)
 
                 raise e
-        else:
-            self.ctx.retries.pop(name, None)
-            return output
 
     async def _call_tool_traced(
         self, call: ToolCallPart, allow_partial: bool = False, wrap_validation_errors: bool = True
diff --git a/tests/test_toolsets.py b/tests/test_toolsets.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import re
+from collections import defaultdict
 from dataclasses import dataclass, replace
 from typing import TypeVar
 from unittest.mock import AsyncMock
@@ -10,7 +11,7 @@
 
 from pydantic_ai._run_context import RunContext
 from pydantic_ai._tool_manager import ToolManager
-from pydantic_ai.exceptions import UserError
+from pydantic_ai.exceptions import ModelRetry, ToolRetryError, UnexpectedModelBehavior, UserError
 from pydantic_ai.messages import ToolCallPart
 from pydantic_ai.models.test import TestModel
 from pydantic_ai.tools import ToolDefinition
@@ -494,3 +495,134 @@ async def test_context_manager_failed_initialization():
             pass
 
     assert server1.is_running is False
+
+
+async def test_tool_manager_retry_logic():
+    """Test the retry logic with failed_tools and for_run_step method."""
+
+    @dataclass
+    class TestDeps:
+        pass
+
+    # Create a toolset with tools that can fail
+    toolset = FunctionToolset[TestDeps](max_retries=2)
+    call_count: defaultdict[str, int] = defaultdict(int)
+
+    @toolset.tool
+    def failing_tool(x: int) -> int:
+        """A tool that always fails"""
+        call_count['failing_tool'] += 1
+        raise ModelRetry('This tool always fails')
+
+    @toolset.tool
+    def other_tool(x: int) -> int:
+        """A tool that works"""
+        call_count['other_tool'] += 1
+        return x * 2
+
+    # Create initial context and tool manager
+    initial_context = build_run_context(TestDeps())
+    tool_manager = await ToolManager[TestDeps].build(toolset, initial_context)
+
+    # Initially no failed tools
+    assert tool_manager.failed_tools == set()
+    assert initial_context.retries == {}
+
+    # Call the failing tool - should add to failed_tools
+    with pytest.raises(ToolRetryError):
+        await tool_manager.handle_call(ToolCallPart(tool_name='failing_tool', args={'x': 1}))
+
+    assert tool_manager.failed_tools == {'failing_tool'}
+    assert call_count['failing_tool'] == 1
+
+    # Call the working tool - should not add to failed_tools
+    result = await tool_manager.handle_call(ToolCallPart(tool_name='other_tool', args={'x': 3}))
+    assert result == 6
+    assert tool_manager.failed_tools == {'failing_tool'}  # unchanged
+    assert call_count['other_tool'] == 1
+
+    # Test for_run_step - should create new tool manager with updated retry counts
+    new_context = build_run_context(TestDeps())
+    new_tool_manager = await tool_manager.for_run_step(new_context)
+
+    # The new tool manager should have retry count for the failed tool
+    assert new_tool_manager.ctx.retries == {'failing_tool': 1}
+    assert new_tool_manager.failed_tools == set()  # reset for new run step
+
+    # Call the failing tool again in the new manager - should have retry=1
+    with pytest.raises(ToolRetryError):
+        await new_tool_manager.handle_call(ToolCallPart(tool_name='failing_tool', args={'x': 1}))
+
+    # Call the failing tool another time in the new manager
+    with pytest.raises(ToolRetryError):
+        await new_tool_manager.handle_call(ToolCallPart(tool_name='failing_tool', args={'x': 1}))
+
+    # Call the failing tool a third time in the new manager
+    with pytest.raises(ToolRetryError):
+        await new_tool_manager.handle_call(ToolCallPart(tool_name='failing_tool', args={'x': 1}))
+
+    assert new_tool_manager.failed_tools == {'failing_tool'}
+    assert call_count['failing_tool'] == 4
+
+    # Create another run step
+    another_context = build_run_context(TestDeps())
+    another_tool_manager = await new_tool_manager.for_run_step(another_context)
+
+    # Should now have retry count of 2 for failing_tool
+    assert another_tool_manager.ctx.retries == {'failing_tool': 2}
+    assert another_tool_manager.failed_tools == set()
+
+    # Call the failing tool _again_, now we should finally hit the limit
+    with pytest.raises(UnexpectedModelBehavior, match="Tool 'failing_tool' exceeded max retries count of 2"):
+        await another_tool_manager.handle_call(ToolCallPart(tool_name='failing_tool', args={'x': 1}))
+
+
+async def test_tool_manager_multiple_failed_tools():
+    """Test retry logic when multiple tools fail in the same run step."""
+
+    @dataclass
+    class TestDeps:
+        pass
+
+    toolset = FunctionToolset[TestDeps]()
+
+    @toolset.tool
+    def tool_a(x: int) -> int:
+        """Tool A that fails"""
+        raise ModelRetry('Tool A fails')
+
+    @toolset.tool
+    def tool_b(x: int) -> int:
+        """Tool B that fails"""
+        raise ModelRetry('Tool B fails')
+
+    @toolset.tool
+    def tool_c(x: int) -> int:
+        """Tool C that works"""
+        return x * 3
+
+    # Create tool manager
+    context = build_run_context(TestDeps())
+    tool_manager = await ToolManager[TestDeps].build(toolset, context)
+
+    # Call tool_a - should fail and be added to failed_tools
+    with pytest.raises(ToolRetryError):
+        await tool_manager.handle_call(ToolCallPart(tool_name='tool_a', args={'x': 1}))
+    assert tool_manager.failed_tools == {'tool_a'}
+
+    # Call tool_b - should also fail and be added to failed_tools
+    with pytest.raises(ToolRetryError):
+        await tool_manager.handle_call(ToolCallPart(tool_name='tool_b', args={'x': 1}))
+    assert tool_manager.failed_tools == {'tool_a', 'tool_b'}
+
+    # Call tool_c - should succeed and not be added to failed_tools
+    result = await tool_manager.handle_call(ToolCallPart(tool_name='tool_c', args={'x': 2}))
+    assert result == 6
+    assert tool_manager.failed_tools == {'tool_a', 'tool_b'}  # unchanged
+
+    # Create next run step - should have retry counts for both failed tools
+    new_context = build_run_context(TestDeps())
+    new_tool_manager = await tool_manager.for_run_step(new_context)
+
+    assert new_tool_manager.ctx.retries == {'tool_a': 1, 'tool_b': 1}
+    assert new_tool_manager.failed_tools == set()  # reset for new run step