diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py index ba14592aa8..a67f8c3a0b 100644 --- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py +++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py @@ -580,12 +580,20 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa self._next_node = await self._handle_text_response(ctx, text) elif invisible_parts: # handle responses with only thinking or built-in tool parts. - # this can happen with models that support thinking mode when they don't provide - # actionable output alongside their thinking content. so we tell the model to try again. - m = _messages.RetryPromptPart( - content='Responses without text or tool calls are not permitted.', - ) - raise ToolRetryError(m) + # Check if this is an incomplete response that should continue + if self.model_response.finish_reason == 'incomplete': + # 'incomplete' indicates the model is pausing mid-execution (e.g., Anthropic's pause_turn + # during long-running builtin tools). This is expected behavior, not an error. + # Continue with an empty request to allow the model to resume without incrementing retries. + self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[])) + else: + # Other cases with invisible parts are errors - this can happen with models that support + # thinking mode when they don't provide actionable output alongside their thinking content. + # So we tell the model to try again. + m = _messages.RetryPromptPart( + content='Responses without text or tool calls are not permitted.', + ) + raise ToolRetryError(m) else: # we got an empty response with no tool calls, text, thinking, or built-in tool calls. # this sometimes happens with anthropic (and perhaps other models) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index ef91907942..108fdc0107 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -58,8 +58,17 @@ 'content_filter', 'tool_call', 'error', + 'incomplete', ] -"""Reason the model finished generating the response, normalized to OpenTelemetry values.""" +"""Reason the model finished generating the response, normalized to OpenTelemetry values. + +- `'stop'`: The model completed its response naturally +- `'length'`: The model hit a token limit +- `'content_filter'`: The response was filtered due to content policy +- `'tool_call'`: The model is requesting a tool call +- `'error'`: An error occurred +- `'incomplete'`: The model paused mid-execution and will continue (e.g., Anthropic's `pause_turn`) +""" @dataclass(repr=False) diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py index 6c6c99210e..571c61bfdb 100644 --- a/pydantic_ai_slim/pydantic_ai/models/anthropic.py +++ b/pydantic_ai_slim/pydantic_ai/models/anthropic.py @@ -42,12 +42,12 @@ from ..tools import ToolDefinition from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests, download_item, get_user_agent -_FINISH_REASON_MAP: dict[BetaStopReason, FinishReason] = { +_FINISH_REASON_MAP: dict[BetaStopReason, FinishReason | None] = { 'end_turn': 'stop', 'max_tokens': 'length', 'stop_sequence': 'stop', 'tool_use': 'tool_call', - 'pause_turn': 'stop', + 'pause_turn': 'incomplete', 'refusal': 'content_filter', } diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py index 6c5795a773..43e47b0ebd 100644 --- a/tests/models/test_anthropic.py +++ b/tests/models/test_anthropic.py @@ -5112,3 +5112,127 @@ def memory(**command: Any) -> Any: According to my memory, you live in **Mexico City**.\ """) + + +def test_pause_turn_finish_reason_mapping(): + """Test that pause_turn is mapped to 'incomplete' so the agent continues.""" + from pydantic_ai.models.anthropic import _FINISH_REASON_MAP # pyright: ignore[reportPrivateUsage] + + assert _FINISH_REASON_MAP['pause_turn'] == 'incomplete' + assert _FINISH_REASON_MAP['end_turn'] == 'stop' + assert _FINISH_REASON_MAP['tool_use'] == 'tool_call' + + +async def test_pause_turn_continues_with_unaltered_history(env: TestEnv, allow_model_requests: None): + """Test that pause_turn causes the agent to continue with unaltered message history. + + This simulates the scenario where a long-running builtin tool (like web_search) + triggers pause_turn, and the agent should retry with the same message history + to allow Anthropic to continue from where it left off. + """ + # First response: web_search starts but pauses (pause_turn) + first_response = BetaMessage( + id='msg_pause', + content=[ + BetaServerToolUseBlock( + id='toolu_pause_123', + name='web_search', + input={'query': 'latest AI developments'}, + type='server_tool_use', + ) + ], + model='claude-sonnet-4-0', + role='assistant', + stop_reason='pause_turn', # ← Key: pause_turn indicates incomplete response + type='message', + usage=BetaUsage(input_tokens=50, output_tokens=10), + ) + + # Second response: web_search completes (continuation after pause_turn) + second_response = BetaMessage( + id='msg_continue', + content=[ + BetaWebSearchToolResultBlock( + tool_use_id='toolu_pause_123', + type='web_search_tool_result', + content=[ + BetaWebSearchResultBlock( + title='Latest AI News', + url='https://example.com/ai-news', + type='web_search_result', + encrypted_content='dummy_encrypted_content', + ) + ], + ), + BetaTextBlock(text='Based on the search results, here are the latest AI developments...', type='text'), + ], + model='claude-sonnet-4-0', + role='assistant', + stop_reason='end_turn', # ← Complete response + type='message', + usage=BetaUsage(input_tokens=60, output_tokens=50), + ) + + # Create mock client that returns both responses in sequence + mock_anthropic = MockAnthropic.create_mock([first_response, second_response]) + m = AnthropicModel('claude-sonnet-4-0', provider=AnthropicProvider(anthropic_client=mock_anthropic)) + agent = Agent(m, builtin_tools=[WebSearchTool()]) + + # Run the agent + result = await agent.run('What are the latest AI developments?') + + # Verify we got the final result + assert result.output == 'Based on the search results, here are the latest AI developments...' + + # Get the captured request kwargs - type: ignore for mock access + mock_client = cast(MockAnthropic, mock_anthropic) + assert len(mock_client.chat_completion_kwargs) == 2 + first_request = mock_client.chat_completion_kwargs[0] + second_request = mock_client.chat_completion_kwargs[1] + + # Verify first request has the user prompt + assert len(first_request['messages']) == 1 + assert first_request['messages'][0]['role'] == 'user' + assert first_request['messages'][0]['content'][0]['text'] == 'What are the latest AI developments?' + + # KEY ASSERTION: Second request preserves ONLY the original user prompt + # and the incomplete server_tool_use from the pause_turn response + # NO retry message is added (clean continuation) + assert len(second_request['messages']) == 2 + + # First message is still the original user prompt + assert second_request['messages'][0]['role'] == 'user' + assert second_request['messages'][0]['content'][0]['text'] == 'What are the latest AI developments?' + + # Second message is the first response (with incomplete server_tool_use) + assert second_request['messages'][1]['role'] == 'assistant' + assert second_request['messages'][1]['content'][0]['type'] == 'server_tool_use' + assert second_request['messages'][1]['content'][0]['name'] == 'web_search' + assert second_request['messages'][1]['content'][0]['id'] == 'toolu_pause_123' + + # No third message! pause_turn is handled cleanly without adding retry prompts + + # Verify the final message history + all_messages = result.all_messages() + + # Find the pause_turn response + pause_turn_responses = [ + msg + for msg in all_messages + if ( + isinstance(msg, ModelResponse) + and msg.provider_details + and msg.provider_details.get('finish_reason') == 'pause_turn' + ) + ] + assert len(pause_turn_responses) == 1 + pause_turn_response = pause_turn_responses[0] + # KEY ASSERTION: pause_turn is mapped to 'incomplete', allowing continuation + assert pause_turn_response.finish_reason == 'incomplete' + assert pause_turn_response.provider_details == {'finish_reason': 'pause_turn'} + + # Verify the pause_turn response contains the incomplete builtin tool call + builtin_tool_parts = [p for p in pause_turn_response.parts if isinstance(p, BuiltinToolCallPart)] + assert len(builtin_tool_parts) == 1 + assert builtin_tool_parts[0].tool_name == 'web_search' + assert builtin_tool_parts[0].tool_call_id == 'toolu_pause_123'