fix: enforce tool call limit enforcement for parallel tool calls

tradeqvest · tradeqvest · commit a235ee74f5fe · 2025-09-22T10:37:40.000+02:00
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -860,6 +860,29 @@ async def process_tool_calls(  # noqa: C901
         output_final_result.append(final_result)
 
 
+def _enforce_tool_call_limits(
+    tool_manager: ToolManager[DepsT],
+    tool_calls: list[_messages.ToolCallPart],
+    usage_limits: _usage.UsageLimits | None,
+) -> tuple[list[_messages.ToolCallPart], int]:
+    """Enforce tool call limits and return limited calls and extra count."""
+    if usage_limits is None or usage_limits.tool_calls_limit is None:
+        return tool_calls, 0
+
+    current_tool_calls = tool_manager.ctx.usage.tool_calls if tool_manager.ctx is not None else 0
+    remaining_allowed = usage_limits.tool_calls_limit - current_tool_calls
+
+    if remaining_allowed <= 0:
+        usage_limits.check_before_tool_call(tool_manager.ctx.usage if tool_manager.ctx else _usage.RunUsage())
+
+    if remaining_allowed < len(tool_calls):
+        limited_tool_calls = tool_calls[: max(0, remaining_allowed)]
+        extra_calls_count = len(tool_calls) - len(limited_tool_calls)
+        return limited_tool_calls, extra_calls_count
+
+    return tool_calls, 0
+
+
 async def _call_tools(
     tool_manager: ToolManager[DepsT],
     tool_calls: list[_messages.ToolCallPart],
@@ -906,6 +929,8 @@ async def handle_call_or_result(
 
                 return _messages.FunctionToolResultEvent(tool_part)
 
+        executed_calls: list[_messages.ToolCallPart] = tool_calls
+
         if tool_manager.should_call_sequentially(tool_calls):
             for index, call in enumerate(tool_calls):
                 if event := await handle_call_or_result(
@@ -915,12 +940,14 @@ async def handle_call_or_result(
                     yield event
 
         else:
+            executed_calls, extra_calls_count = _enforce_tool_call_limits(tool_manager, tool_calls, usage_limits)
+
             tasks = [
                 asyncio.create_task(
                     _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id), usage_limits),
                     name=call.tool_name,
                 )
-                for call in tool_calls
+                for call in executed_calls
             ]
 
             pending = tasks
@@ -931,13 +958,17 @@ async def handle_call_or_result(
                     if event := await handle_call_or_result(coro_or_task=task, index=index):
                         yield event
 
+            # If there were extra calls beyond the allowed limit, raise now
+            if extra_calls_count and usage_limits is not None:
+                usage_limits.check_before_tool_call(tool_manager.ctx.usage if tool_manager.ctx else _usage.RunUsage())
+
     # We append the results at the end, rather than as they are received, to retain a consistent ordering
     # This is mostly just to simplify testing
     output_parts.extend([tool_parts_by_index[k] for k in sorted(tool_parts_by_index)])
     output_parts.extend([user_parts_by_index[k] for k in sorted(user_parts_by_index)])
 
     for k in sorted(deferred_calls_by_index):
-        output_deferred_calls[deferred_calls_by_index[k]].append(tool_calls[k])
+        output_deferred_calls[deferred_calls_by_index[k]].append(executed_calls[k])
 
 
 async def _call_tool(
diff --git a/tests/test_usage_limits.py b/tests/test_usage_limits.py
@@ -1,3 +1,4 @@
+import asyncio
 import functools
 import operator
 import re
@@ -12,7 +13,15 @@
 
 from pydantic_ai import Agent, RunContext, UsageLimitExceeded
 from pydantic_ai.exceptions import ModelRetry
-from pydantic_ai.messages import ModelRequest, ModelResponse, ToolCallPart, ToolReturnPart, UserPromptPart
+from pydantic_ai.messages import (
+    ModelMessage,
+    ModelRequest,
+    ModelResponse,
+    ToolCallPart,
+    ToolReturnPart,
+    UserPromptPart,
+)
+from pydantic_ai.models.function import AgentInfo, FunctionModel
 from pydantic_ai.models.test import TestModel
 from pydantic_ai.output import ToolOutput
 from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
@@ -308,3 +317,67 @@ def test_deprecated_usage_limits():
         snapshot(['DeprecationWarning: `response_tokens_limit` is deprecated, use `output_tokens_limit` instead'])
     ):
         assert UsageLimits(output_tokens_limit=100).response_tokens_limit == 100  # type: ignore
+
+
+async def test_parallel_tool_calls_limit_enforced():
+    """Parallel tool calls must not exceed the limit and should raise immediately."""
+    executed_tools: list[str] = []
+
+    model_call_count = 0
+
+    def test_model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
+        nonlocal model_call_count
+        model_call_count += 1
+
+        if model_call_count == 1:
+            # First response: 5 parallel tool calls
+            return ModelResponse(
+                parts=[
+                    ToolCallPart('tool_a', {}, 'call_1'),
+                    ToolCallPart('tool_b', {}, 'call_2'),
+                    ToolCallPart('tool_c', {}, 'call_3'),
+                    ToolCallPart('tool_a', {}, 'call_4'),
+                    ToolCallPart('tool_b', {}, 'call_5'),
+                ]
+            )
+        else:
+            assert model_call_count == 2
+            # Second response: 3 parallel tool calls (should exceed limit)
+            return ModelResponse(
+                parts=[
+                    ToolCallPart('tool_c', {}, 'call_6'),
+                    ToolCallPart('tool_a', {}, 'call_7'),
+                    ToolCallPart('tool_b', {}, 'call_8'),
+                ]
+            )
+
+    test_model = FunctionModel(test_model_function)
+    agent = Agent(test_model)
+
+    @agent.tool_plain
+    async def tool_a() -> str:
+        await asyncio.sleep(0.01)
+        executed_tools.append('a')
+        return 'result a'
+
+    @agent.tool_plain
+    async def tool_b() -> str:
+        await asyncio.sleep(0.01)
+        executed_tools.append('b')
+        return 'result b'
+
+    @agent.tool_plain
+    async def tool_c() -> str:
+        await asyncio.sleep(0.01)
+        executed_tools.append('c')
+        return 'result c'
+
+    # Run with tool call limit of 6; expecting an error once the limit is reached
+    with pytest.raises(
+        UsageLimitExceeded,
+        match=r'The next tool call would exceed the tool_calls_limit of 6 \(tool_calls=(6)\)',
+    ):
+        await agent.run('Use tools', usage_limits=UsageLimits(tool_calls_limit=6))
+
+    # Only 6 tool calls should have actually executed
+    assert len(executed_tools) == 6