Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions pydantic_ai_slim/pydantic_ai/_agent_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,12 +580,20 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa
self._next_node = await self._handle_text_response(ctx, text)
elif invisible_parts:
# handle responses with only thinking or built-in tool parts.
# this can happen with models that support thinking mode when they don't provide
# actionable output alongside their thinking content. so we tell the model to try again.
m = _messages.RetryPromptPart(
content='Responses without text or tool calls are not permitted.',
)
raise ToolRetryError(m)
# Check if this is an incomplete response that should continue
if self.model_response.finish_reason == 'incomplete':
# 'incomplete' indicates the model is pausing mid-execution (e.g., Anthropic's pause_turn
# during long-running builtin tools). This is expected behavior, not an error.
# Continue with an empty request to allow the model to resume without incrementing retries.
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[]))
else:
# Other cases with invisible parts are errors - this can happen with models that support
# thinking mode when they don't provide actionable output alongside their thinking content.
# So we tell the model to try again.
m = _messages.RetryPromptPart(
content='Responses without text or tool calls are not permitted.',
)
raise ToolRetryError(m)
else:
# we got an empty response with no tool calls, text, thinking, or built-in tool calls.
# this sometimes happens with anthropic (and perhaps other models)
Expand Down
11 changes: 10 additions & 1 deletion pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,17 @@
'content_filter',
'tool_call',
'error',
'incomplete',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't add this here, unfortunately, as these values need to match those supported by the OpenTelemetry gen_ai spec.

So if we want to implement this generically in the agent graph, we'd need to add a new incomplete=True boolean to ModelResponse. I'm hesitant to do that though, for something that so far only Anthropic requires -- we typically don't add fields unless they're supported by 2 major providers.

So if it all possible, can you find a way to implement this exclusively inside the AnthropicModel?

Copy link
Author

@kazmer97 kazmer97 Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can look into that, but as far as I understand the project design so far, isn't the agent graph that drives the api requests to the model? So far model classes have only been responsible for message translation. Wouldn't it pollute the class if we started introducing api requests into at the model level? Or do you have another mechanism in mind that would allow to manipulate the message chain before it reaches the agent graph?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To address your points:

  1. As far as I can read the gen_ai spec of otel the pydantic finish reasons are already extending over the examples that are documented.
  2. Other models are not exhibiting this behaviour yet, but it is a very likely pattern if the trend of provider hosted tools becoming more common continues. Moonshotai has their own internal websearchTool, openai has their internal websearchTool along with goggle. It is a likely pattern to emerge for web search and code execution tools that run longer server side as pause turn allows the client application to prevent timeouts on the api.

]
"""Reason the model finished generating the response, normalized to OpenTelemetry values."""
"""Reason the model finished generating the response, normalized to OpenTelemetry values.
- `'stop'`: The model completed its response naturally
- `'length'`: The model hit a token limit
- `'content_filter'`: The response was filtered due to content policy
- `'tool_call'`: The model is requesting a tool call
- `'error'`: An error occurred
- `'incomplete'`: The model paused mid-execution and will continue (e.g., Anthropic's `pause_turn`)
"""


@dataclass(repr=False)
Expand Down
4 changes: 2 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@
from ..tools import ToolDefinition
from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests, download_item, get_user_agent

_FINISH_REASON_MAP: dict[BetaStopReason, FinishReason] = {
_FINISH_REASON_MAP: dict[BetaStopReason, FinishReason | None] = {
'end_turn': 'stop',
'max_tokens': 'length',
'stop_sequence': 'stop',
'tool_use': 'tool_call',
'pause_turn': 'stop',
'pause_turn': 'incomplete',
'refusal': 'content_filter',
}

Expand Down
124 changes: 124 additions & 0 deletions tests/models/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5112,3 +5112,127 @@ def memory(**command: Any) -> Any:

According to my memory, you live in **Mexico City**.\
""")


def test_pause_turn_finish_reason_mapping():
"""Test that pause_turn is mapped to 'incomplete' so the agent continues."""
from pydantic_ai.models.anthropic import _FINISH_REASON_MAP # pyright: ignore[reportPrivateUsage]

assert _FINISH_REASON_MAP['pause_turn'] == 'incomplete'
assert _FINISH_REASON_MAP['end_turn'] == 'stop'
assert _FINISH_REASON_MAP['tool_use'] == 'tool_call'


async def test_pause_turn_continues_with_unaltered_history(env: TestEnv, allow_model_requests: None):
"""Test that pause_turn causes the agent to continue with unaltered message history.

This simulates the scenario where a long-running builtin tool (like web_search)
triggers pause_turn, and the agent should retry with the same message history
to allow Anthropic to continue from where it left off.
"""
# First response: web_search starts but pauses (pause_turn)
first_response = BetaMessage(
id='msg_pause',
content=[
BetaServerToolUseBlock(
id='toolu_pause_123',
name='web_search',
input={'query': 'latest AI developments'},
type='server_tool_use',
)
],
model='claude-sonnet-4-0',
role='assistant',
stop_reason='pause_turn', # ← Key: pause_turn indicates incomplete response
type='message',
usage=BetaUsage(input_tokens=50, output_tokens=10),
)

# Second response: web_search completes (continuation after pause_turn)
second_response = BetaMessage(
id='msg_continue',
content=[
BetaWebSearchToolResultBlock(
tool_use_id='toolu_pause_123',
type='web_search_tool_result',
content=[
BetaWebSearchResultBlock(
title='Latest AI News',
url='https://example.com/ai-news',
type='web_search_result',
encrypted_content='dummy_encrypted_content',
)
],
),
BetaTextBlock(text='Based on the search results, here are the latest AI developments...', type='text'),
],
model='claude-sonnet-4-0',
role='assistant',
stop_reason='end_turn', # ← Complete response
type='message',
usage=BetaUsage(input_tokens=60, output_tokens=50),
)

# Create mock client that returns both responses in sequence
mock_anthropic = MockAnthropic.create_mock([first_response, second_response])
m = AnthropicModel('claude-sonnet-4-0', provider=AnthropicProvider(anthropic_client=mock_anthropic))
agent = Agent(m, builtin_tools=[WebSearchTool()])

# Run the agent
result = await agent.run('What are the latest AI developments?')

# Verify we got the final result
assert result.output == 'Based on the search results, here are the latest AI developments...'

# Get the captured request kwargs - type: ignore for mock access
mock_client = cast(MockAnthropic, mock_anthropic)
assert len(mock_client.chat_completion_kwargs) == 2
first_request = mock_client.chat_completion_kwargs[0]
second_request = mock_client.chat_completion_kwargs[1]

# Verify first request has the user prompt
assert len(first_request['messages']) == 1
assert first_request['messages'][0]['role'] == 'user'
assert first_request['messages'][0]['content'][0]['text'] == 'What are the latest AI developments?'

# KEY ASSERTION: Second request preserves ONLY the original user prompt
# and the incomplete server_tool_use from the pause_turn response
# NO retry message is added (clean continuation)
assert len(second_request['messages']) == 2

# First message is still the original user prompt
assert second_request['messages'][0]['role'] == 'user'
assert second_request['messages'][0]['content'][0]['text'] == 'What are the latest AI developments?'

# Second message is the first response (with incomplete server_tool_use)
assert second_request['messages'][1]['role'] == 'assistant'
assert second_request['messages'][1]['content'][0]['type'] == 'server_tool_use'
assert second_request['messages'][1]['content'][0]['name'] == 'web_search'
assert second_request['messages'][1]['content'][0]['id'] == 'toolu_pause_123'

# No third message! pause_turn is handled cleanly without adding retry prompts

# Verify the final message history
all_messages = result.all_messages()

# Find the pause_turn response
pause_turn_responses = [
msg
for msg in all_messages
if (
isinstance(msg, ModelResponse)
and msg.provider_details
and msg.provider_details.get('finish_reason') == 'pause_turn'
)
]
assert len(pause_turn_responses) == 1
pause_turn_response = pause_turn_responses[0]
# KEY ASSERTION: pause_turn is mapped to 'incomplete', allowing continuation
assert pause_turn_response.finish_reason == 'incomplete'
assert pause_turn_response.provider_details == {'finish_reason': 'pause_turn'}

# Verify the pause_turn response contains the incomplete builtin tool call
builtin_tool_parts = [p for p in pause_turn_response.parts if isinstance(p, BuiltinToolCallPart)]
assert len(builtin_tool_parts) == 1
assert builtin_tool_parts[0].tool_name == 'web_search'
assert builtin_tool_parts[0].tool_call_id == 'toolu_pause_123'