Skip to content

Commit 9d20769

Browse files
erhuveDouweM
andauthored
Raise IncompleteToolCall when token limit is reached during generation of tool call (#3137)
Co-authored-by: Douwe Maan <[email protected]>
1 parent 8bc730e commit 9d20769

File tree

4 files changed

+79
-6
lines changed

4 files changed

+79
-6
lines changed

pydantic_ai_slim/pydantic_ai/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
ApprovalRequired,
2323
CallDeferred,
2424
FallbackExceptionGroup,
25+
IncompleteToolCall,
2526
ModelHTTPError,
2627
ModelRetry,
2728
UnexpectedModelBehavior,
@@ -124,6 +125,7 @@
124125
'ModelRetry',
125126
'ModelHTTPError',
126127
'FallbackExceptionGroup',
128+
'IncompleteToolCall',
127129
'UnexpectedModelBehavior',
128130
'UsageLimitExceeded',
129131
'UserError',

pydantic_ai_slim/pydantic_ai/_agent_graph.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,28 @@ class GraphAgentState:
9292
retries: int = 0
9393
run_step: int = 0
9494

95-
def increment_retries(self, max_result_retries: int, error: BaseException | None = None) -> None:
95+
def increment_retries(
96+
self,
97+
max_result_retries: int,
98+
error: BaseException | None = None,
99+
model_settings: ModelSettings | None = None,
100+
) -> None:
96101
self.retries += 1
97102
if self.retries > max_result_retries:
103+
if (
104+
self.message_history
105+
and isinstance(model_response := self.message_history[-1], _messages.ModelResponse)
106+
and model_response.finish_reason == 'length'
107+
and model_response.parts
108+
and isinstance(tool_call := model_response.parts[-1], _messages.ToolCallPart)
109+
):
110+
try:
111+
tool_call.args_as_dict()
112+
except Exception:
113+
max_tokens = (model_settings or {}).get('max_tokens') if model_settings else None
114+
raise exceptions.IncompleteToolCall(
115+
f'Model token limit ({max_tokens if max_tokens is not None else "provider default"}) exceeded while emitting a tool call, resulting in incomplete arguments. Increase max tokens or simplify tool call arguments to fit within limit.'
116+
)
98117
message = f'Exceeded maximum retries ({max_result_retries}) for output validation'
99118
if error:
100119
if isinstance(error, exceptions.UnexpectedModelBehavior) and error.__cause__ is not None:
@@ -568,7 +587,7 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa
568587
# resubmit the most recent request that resulted in an empty response,
569588
# as the empty response and request will not create any items in the API payload,
570589
# in the hope the model will return a non-empty response this time.
571-
ctx.state.increment_retries(ctx.deps.max_result_retries)
590+
ctx.state.increment_retries(ctx.deps.max_result_retries, model_settings=ctx.deps.model_settings)
572591
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[]))
573592
return
574593

@@ -630,7 +649,9 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa
630649
)
631650
raise ToolRetryError(m)
632651
except ToolRetryError as e:
633-
ctx.state.increment_retries(ctx.deps.max_result_retries, e)
652+
ctx.state.increment_retries(
653+
ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
654+
)
634655
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[e.tool_retry]))
635656

636657
self._events_iterator = _run_stream()
@@ -788,10 +809,14 @@ async def process_tool_calls( # noqa: C901
788809
try:
789810
result_data = await tool_manager.handle_call(call)
790811
except exceptions.UnexpectedModelBehavior as e:
791-
ctx.state.increment_retries(ctx.deps.max_result_retries, e)
812+
ctx.state.increment_retries(
813+
ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
814+
)
792815
raise e # pragma: lax no cover
793816
except ToolRetryError as e:
794-
ctx.state.increment_retries(ctx.deps.max_result_retries, e)
817+
ctx.state.increment_retries(
818+
ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
819+
)
795820
yield _messages.FunctionToolCallEvent(call)
796821
output_parts.append(e.tool_retry)
797822
yield _messages.FunctionToolResultEvent(e.tool_retry)
@@ -820,7 +845,7 @@ async def process_tool_calls( # noqa: C901
820845

821846
# Then, we handle unknown tool calls
822847
if tool_calls_by_kind['unknown']:
823-
ctx.state.increment_retries(ctx.deps.max_result_retries)
848+
ctx.state.increment_retries(ctx.deps.max_result_retries, model_settings=ctx.deps.model_settings)
824849
calls_to_run.extend(tool_calls_by_kind['unknown'])
825850

826851
calls_to_run_results: dict[str, DeferredToolResult] = {}

pydantic_ai_slim/pydantic_ai/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
'UnexpectedModelBehavior',
2424
'UsageLimitExceeded',
2525
'ModelHTTPError',
26+
'IncompleteToolCall',
2627
'FallbackExceptionGroup',
2728
)
2829

@@ -168,3 +169,7 @@ class ToolRetryError(Exception):
168169
def __init__(self, tool_retry: RetryPromptPart):
169170
self.tool_retry = tool_retry
170171
super().__init__()
172+
173+
174+
class IncompleteToolCall(UnexpectedModelBehavior):
175+
"""Error raised when a model stops due to token limit while emitting a tool call."""

tests/test_agent.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
DocumentUrl,
2828
FunctionToolset,
2929
ImageUrl,
30+
IncompleteToolCall,
3031
ModelMessage,
3132
ModelMessagesTypeAdapter,
3233
ModelProfile,
@@ -63,6 +64,7 @@
6364
from pydantic_ai.output import StructuredDict, ToolOutput
6465
from pydantic_ai.result import RunUsage
6566
from pydantic_ai.run import AgentRunResultEvent
67+
from pydantic_ai.settings import ModelSettings
6668
from pydantic_ai.tools import DeferredToolRequests, DeferredToolResults, ToolDefinition, ToolDenied
6769
from pydantic_ai.usage import RequestUsage
6870

@@ -2448,6 +2450,45 @@ def empty(m: list[ModelMessage], _info: AgentInfo) -> ModelResponse:
24482450
)
24492451

24502452

2453+
def test_tool_exceeds_token_limit_error():
2454+
def return_incomplete_tool(_: list[ModelMessage], info: AgentInfo) -> ModelResponse:
2455+
resp = ModelResponse(parts=[ToolCallPart('dummy_tool', args='{"foo": "bar",')])
2456+
resp.finish_reason = 'length'
2457+
return resp
2458+
2459+
agent = Agent(FunctionModel(return_incomplete_tool), output_type=str)
2460+
2461+
with pytest.raises(
2462+
IncompleteToolCall,
2463+
match=r'Model token limit \(10\) exceeded while emitting a tool call, resulting in incomplete arguments. Increase max tokens or simplify tool call arguments to fit within limit.',
2464+
):
2465+
agent.run_sync('Hello', model_settings=ModelSettings(max_tokens=10))
2466+
2467+
with pytest.raises(
2468+
IncompleteToolCall,
2469+
match=r'Model token limit \(provider default\) exceeded while emitting a tool call, resulting in incomplete arguments. Increase max tokens or simplify tool call arguments to fit within limit.',
2470+
):
2471+
agent.run_sync('Hello')
2472+
2473+
2474+
def test_tool_exceeds_token_limit_but_complete_args():
2475+
def return_complete_tool_but_hit_limit(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
2476+
if len(messages) == 1:
2477+
resp = ModelResponse(parts=[ToolCallPart('dummy_tool', args='{"foo": "bar"}')])
2478+
resp.finish_reason = 'length'
2479+
return resp
2480+
return ModelResponse(parts=[TextPart('done')])
2481+
2482+
agent = Agent(FunctionModel(return_complete_tool_but_hit_limit), output_type=str)
2483+
2484+
@agent.tool_plain
2485+
def dummy_tool(foo: str) -> str:
2486+
return 'tool-ok'
2487+
2488+
result = agent.run_sync('Hello')
2489+
assert result.output == 'done'
2490+
2491+
24512492
def test_model_requests_blocked(env: TestEnv):
24522493
try:
24532494
env.set('GEMINI_API_KEY', 'foobar')

0 commit comments

Comments
 (0)