-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Raise IncompleteToolCall when token limit is reached during generation of tool call
#3137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
DouweM
merged 6 commits into
pydantic:main
from
erhuve:feat/ray/tool-exceeds-token-limit-error
Oct 21, 2025
Merged
Changes from 3 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
537d6ce
Put error in increment_retries to allow attempts, check lists for ele…
erhuve 7ea20fb
add set token limit to error message
erhuve 0919e01
Merge branch 'main' into feat/ray/tool-exceeds-token-limit-error
erhuve ad4ee6c
Merge branch 'main' of github.com:erhuve/pydantic-ai into feat/ray/to…
erhuve 6aa9c1d
rename to IncompleteToolCall, subclass UnexpectedModelBehavior for ba…
erhuve 7836e94
Merge branch 'main' into feat/ray/tool-exceeds-token-limit-error
DouweM File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -92,9 +92,24 @@ class GraphAgentState: | |
| retries: int = 0 | ||
| run_step: int = 0 | ||
|
|
||
| def increment_retries(self, max_result_retries: int, error: BaseException | None = None) -> None: | ||
| def increment_retries( | ||
| self, max_result_retries: int, error: BaseException | None = None, max_tokens: int | None = None | ||
| ) -> None: | ||
| self.retries += 1 | ||
| if self.retries > max_result_retries: | ||
| if ( | ||
| self.message_history | ||
| and isinstance(model_response := self.message_history[-1], _messages.ModelResponse) | ||
| and model_response.finish_reason == 'length' | ||
| and model_response.parts | ||
| and isinstance(tool_call := model_response.parts[-1], _messages.ToolCallPart) | ||
| ): | ||
| try: | ||
| tool_call.args_as_dict() | ||
| except Exception: | ||
| raise exceptions.ToolExceedsTokenLimitError( | ||
| f'Model token limit ({max_tokens if max_tokens is not None else "provider default"}) exceeded while emitting a tool call, resulting in incomplete arguments. Increase max tokens or simplify tool call arguments to fit within limit.' | ||
| ) | ||
| message = f'Exceeded maximum retries ({max_result_retries}) for output validation' | ||
| if error: | ||
| if isinstance(error, exceptions.UnexpectedModelBehavior) and error.__cause__ is not None: | ||
|
|
@@ -568,7 +583,8 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa | |
| # resubmit the most recent request that resulted in an empty response, | ||
| # as the empty response and request will not create any items in the API payload, | ||
| # in the hope the model will return a non-empty response this time. | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries) | ||
| max_tokens = (ctx.deps.model_settings or {}).get('max_tokens') if ctx.deps.model_settings else None | ||
|
||
| ctx.state.increment_retries(ctx.deps.max_result_retries, max_tokens=max_tokens) | ||
| self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[])) | ||
| return | ||
|
|
||
|
|
@@ -630,7 +646,8 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa | |
| ) | ||
| raise ToolRetryError(m) | ||
| except ToolRetryError as e: | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries, e) | ||
| max_tokens = (ctx.deps.model_settings or {}).get('max_tokens') if ctx.deps.model_settings else None | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries, error=e, max_tokens=max_tokens) | ||
| self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[e.tool_retry])) | ||
|
|
||
| self._events_iterator = _run_stream() | ||
|
|
@@ -776,10 +793,12 @@ async def process_tool_calls( # noqa: C901 | |
| try: | ||
| result_data = await tool_manager.handle_call(call) | ||
| except exceptions.UnexpectedModelBehavior as e: | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries, e) | ||
| max_tokens = (ctx.deps.model_settings or {}).get('max_tokens') if ctx.deps.model_settings else None | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries, error=e, max_tokens=max_tokens) | ||
| raise e # pragma: lax no cover | ||
| except ToolRetryError as e: | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries, e) | ||
| max_tokens = (ctx.deps.model_settings or {}).get('max_tokens') if ctx.deps.model_settings else None | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries, error=e, max_tokens=max_tokens) | ||
| yield _messages.FunctionToolCallEvent(call) | ||
| output_parts.append(e.tool_retry) | ||
| yield _messages.FunctionToolResultEvent(e.tool_retry) | ||
|
|
@@ -808,7 +827,8 @@ async def process_tool_calls( # noqa: C901 | |
|
|
||
| # Then, we handle unknown tool calls | ||
| if tool_calls_by_kind['unknown']: | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries) | ||
| max_tokens = (ctx.deps.model_settings or {}).get('max_tokens') if ctx.deps.model_settings else None | ||
| ctx.state.increment_retries(ctx.deps.max_result_retries, max_tokens=max_tokens) | ||
| calls_to_run.extend(tool_calls_by_kind['unknown']) | ||
|
|
||
| calls_to_run_results: dict[str, DeferredToolResult] = {} | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be a subclass of
UnexpectedModelBehaviorfor backward compatibility, and I suggest renaming itIncompleteToolCall