Skip to content

Commit 0ffbf24

Browse files
committed
Fix Anthropic pause_turn handling to allow agent continuation
Introduces a new official 'incomplete' finish reason to the FinishReason enum to represent cases where a model pauses mid-execution and will continue (e.g., Anthropic's pause_turn during long-running builtin tools). Changes: - Add 'incomplete' to FinishReason TypeAlias with documentation - Map Anthropic's 'pause_turn' to 'incomplete' instead of None - Update agent graph to recognize 'incomplete' and continue with empty request Benefits: - Provider-agnostic: other models can use 'incomplete' for similar behavior - Proper separation of concerns: agent graph doesn't check provider-specific details The agent graph now checks for finish_reason == 'incomplete' instead of checking provider_details for 'pause_turn', maintaining clean architectural boundaries. Fix: Don't increment retries for 'incomplete' finish reason The 'incomplete' finish reason indicates expected mid-execution pausing (e.g., Anthropic's pause_turn during long-running builtin tools), not an error condition. This is normal behavior where the model is saying 'I'm working on something, let me continue' rather than 'something went wrong'.
1 parent 51fec9f commit 0ffbf24

File tree

4 files changed

+150
-9
lines changed

4 files changed

+150
-9
lines changed

pydantic_ai_slim/pydantic_ai/_agent_graph.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -582,12 +582,20 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa
582582
self._next_node = await self._handle_text_response(ctx, text)
583583
elif invisible_parts:
584584
# handle responses with only thinking or built-in tool parts.
585-
# this can happen with models that support thinking mode when they don't provide
586-
# actionable output alongside their thinking content. so we tell the model to try again.
587-
m = _messages.RetryPromptPart(
588-
content='Responses without text or tool calls are not permitted.',
589-
)
590-
raise ToolRetryError(m)
585+
# Check if this is an incomplete response that should continue
586+
if self.model_response.finish_reason == 'incomplete':
587+
# 'incomplete' indicates the model is pausing mid-execution (e.g., Anthropic's pause_turn
588+
# during long-running builtin tools). This is expected behavior, not an error.
589+
# Continue with an empty request to allow the model to resume without incrementing retries.
590+
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[]))
591+
else:
592+
# Other cases with invisible parts are errors - this can happen with models that support
593+
# thinking mode when they don't provide actionable output alongside their thinking content.
594+
# So we tell the model to try again.
595+
m = _messages.RetryPromptPart(
596+
content='Responses without text or tool calls are not permitted.',
597+
)
598+
raise ToolRetryError(m)
591599
else:
592600
# we got an empty response with no tool calls, text, thinking, or built-in tool calls.
593601
# this sometimes happens with anthropic (and perhaps other models)

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,17 @@
5858
'content_filter',
5959
'tool_call',
6060
'error',
61+
'incomplete',
6162
]
62-
"""Reason the model finished generating the response, normalized to OpenTelemetry values."""
63+
"""Reason the model finished generating the response, normalized to OpenTelemetry values.
64+
65+
- `'stop'`: The model completed its response naturally
66+
- `'length'`: The model hit a token limit
67+
- `'content_filter'`: The response was filtered due to content policy
68+
- `'tool_call'`: The model is requesting a tool call
69+
- `'error'`: An error occurred
70+
- `'incomplete'`: The model paused mid-execution and will continue (e.g., Anthropic's `pause_turn`)
71+
"""
6372

6473

6574
@dataclass(repr=False)

pydantic_ai_slim/pydantic_ai/models/anthropic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@
4242
from ..tools import ToolDefinition
4343
from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests, download_item, get_user_agent
4444

45-
_FINISH_REASON_MAP: dict[BetaStopReason, FinishReason] = {
45+
_FINISH_REASON_MAP: dict[BetaStopReason, FinishReason | None] = {
4646
'end_turn': 'stop',
4747
'max_tokens': 'length',
4848
'stop_sequence': 'stop',
4949
'tool_use': 'tool_call',
50-
'pause_turn': 'stop',
50+
'pause_turn': 'incomplete',
5151
'refusal': 'content_filter',
5252
}
5353

tests/models/test_anthropic.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5112,3 +5112,127 @@ def memory(**command: Any) -> Any:
51125112
51135113
According to my memory, you live in **Mexico City**.\
51145114
""")
5115+
5116+
5117+
def test_pause_turn_finish_reason_mapping():
5118+
"""Test that pause_turn is mapped to 'incomplete' so the agent continues."""
5119+
from pydantic_ai.models.anthropic import _FINISH_REASON_MAP # pyright: ignore[reportPrivateUsage]
5120+
5121+
assert _FINISH_REASON_MAP['pause_turn'] == 'incomplete'
5122+
assert _FINISH_REASON_MAP['end_turn'] == 'stop'
5123+
assert _FINISH_REASON_MAP['tool_use'] == 'tool_call'
5124+
5125+
5126+
async def test_pause_turn_continues_with_unaltered_history(env: TestEnv, allow_model_requests: None):
5127+
"""Test that pause_turn causes the agent to continue with unaltered message history.
5128+
5129+
This simulates the scenario where a long-running builtin tool (like web_search)
5130+
triggers pause_turn, and the agent should retry with the same message history
5131+
to allow Anthropic to continue from where it left off.
5132+
"""
5133+
# First response: web_search starts but pauses (pause_turn)
5134+
first_response = BetaMessage(
5135+
id='msg_pause',
5136+
content=[
5137+
BetaServerToolUseBlock(
5138+
id='toolu_pause_123',
5139+
name='web_search',
5140+
input={'query': 'latest AI developments'},
5141+
type='server_tool_use',
5142+
)
5143+
],
5144+
model='claude-sonnet-4-0',
5145+
role='assistant',
5146+
stop_reason='pause_turn', # ← Key: pause_turn indicates incomplete response
5147+
type='message',
5148+
usage=BetaUsage(input_tokens=50, output_tokens=10),
5149+
)
5150+
5151+
# Second response: web_search completes (continuation after pause_turn)
5152+
second_response = BetaMessage(
5153+
id='msg_continue',
5154+
content=[
5155+
BetaWebSearchToolResultBlock(
5156+
tool_use_id='toolu_pause_123',
5157+
type='web_search_tool_result',
5158+
content=[
5159+
BetaWebSearchResultBlock(
5160+
title='Latest AI News',
5161+
url='https://example.com/ai-news',
5162+
type='web_search_result',
5163+
encrypted_content='dummy_encrypted_content',
5164+
)
5165+
],
5166+
),
5167+
BetaTextBlock(text='Based on the search results, here are the latest AI developments...', type='text'),
5168+
],
5169+
model='claude-sonnet-4-0',
5170+
role='assistant',
5171+
stop_reason='end_turn', # ← Complete response
5172+
type='message',
5173+
usage=BetaUsage(input_tokens=60, output_tokens=50),
5174+
)
5175+
5176+
# Create mock client that returns both responses in sequence
5177+
mock_anthropic = MockAnthropic.create_mock([first_response, second_response])
5178+
m = AnthropicModel('claude-sonnet-4-0', provider=AnthropicProvider(anthropic_client=mock_anthropic))
5179+
agent = Agent(m, builtin_tools=[WebSearchTool()])
5180+
5181+
# Run the agent
5182+
result = await agent.run('What are the latest AI developments?')
5183+
5184+
# Verify we got the final result
5185+
assert result.output == 'Based on the search results, here are the latest AI developments...'
5186+
5187+
# Get the captured request kwargs - type: ignore for mock access
5188+
mock_client = cast(MockAnthropic, mock_anthropic)
5189+
assert len(mock_client.chat_completion_kwargs) == 2
5190+
first_request = mock_client.chat_completion_kwargs[0]
5191+
second_request = mock_client.chat_completion_kwargs[1]
5192+
5193+
# Verify first request has the user prompt
5194+
assert len(first_request['messages']) == 1
5195+
assert first_request['messages'][0]['role'] == 'user'
5196+
assert first_request['messages'][0]['content'][0]['text'] == 'What are the latest AI developments?'
5197+
5198+
# KEY ASSERTION: Second request preserves ONLY the original user prompt
5199+
# and the incomplete server_tool_use from the pause_turn response
5200+
# NO retry message is added (clean continuation)
5201+
assert len(second_request['messages']) == 2
5202+
5203+
# First message is still the original user prompt
5204+
assert second_request['messages'][0]['role'] == 'user'
5205+
assert second_request['messages'][0]['content'][0]['text'] == 'What are the latest AI developments?'
5206+
5207+
# Second message is the first response (with incomplete server_tool_use)
5208+
assert second_request['messages'][1]['role'] == 'assistant'
5209+
assert second_request['messages'][1]['content'][0]['type'] == 'server_tool_use'
5210+
assert second_request['messages'][1]['content'][0]['name'] == 'web_search'
5211+
assert second_request['messages'][1]['content'][0]['id'] == 'toolu_pause_123'
5212+
5213+
# No third message! pause_turn is handled cleanly without adding retry prompts
5214+
5215+
# Verify the final message history
5216+
all_messages = result.all_messages()
5217+
5218+
# Find the pause_turn response
5219+
pause_turn_responses = [
5220+
msg
5221+
for msg in all_messages
5222+
if (
5223+
isinstance(msg, ModelResponse)
5224+
and msg.provider_details
5225+
and msg.provider_details.get('finish_reason') == 'pause_turn'
5226+
)
5227+
]
5228+
assert len(pause_turn_responses) == 1
5229+
pause_turn_response = pause_turn_responses[0]
5230+
# KEY ASSERTION: pause_turn is mapped to 'incomplete', allowing continuation
5231+
assert pause_turn_response.finish_reason == 'incomplete'
5232+
assert pause_turn_response.provider_details == {'finish_reason': 'pause_turn'}
5233+
5234+
# Verify the pause_turn response contains the incomplete builtin tool call
5235+
builtin_tool_parts = [p for p in pause_turn_response.parts if isinstance(p, BuiltinToolCallPart)]
5236+
assert len(builtin_tool_parts) == 1
5237+
assert builtin_tool_parts[0].tool_name == 'web_search'
5238+
assert builtin_tool_parts[0].tool_call_id == 'toolu_pause_123'

0 commit comments

Comments
 (0)