add handling for thinking-only requests (currently causes UnexpectedModelBehavior)

ethanabrooks · ethanabrooks · commit f003b748dffd · 2025-08-14T10:41:54.000-04:00
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -143,6 +143,18 @@ def is_agent_node(
     return isinstance(node, AgentNode)
 
 
+async def _create_thinking_retry(
+    ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]],
+) -> ModelRequestNode[DepsT, NodeRunEndT]:
+    # Create retry prompt
+    retry_prompt = 'Responses without text or tool calls are not permitted.'
+    retry_part = _messages.RetryPromptPart(retry_prompt)
+    retry_request = _messages.ModelRequest(parts=[retry_part])
+
+    # Create new ModelRequestNode for retry (it will add the request to message history)
+    return ModelRequestNode[DepsT, NodeRunEndT](request=retry_request)
+
+
 @dataclasses.dataclass
 class UserPromptNode(AgentNode[DepsT, NodeRunEndT]):
     """The node that handles the user prompt and instructions."""
@@ -434,9 +446,10 @@ async def _run_stream(  # noqa: C901
         if self._events_iterator is None:
             # Ensure that the stream is only run once
 
-            async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
+            async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:  # noqa: C901
                 texts: list[str] = []
                 tool_calls: list[_messages.ToolCallPart] = []
+
                 for part in self.model_response.parts:
                     if isinstance(part, _messages.TextPart):
                         # ignore empty content for text parts, see #437
@@ -468,19 +481,30 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
                     # No events are emitted during the handling of text responses, so we don't need to yield anything
                     self._next_node = await self._handle_text_response(ctx, texts)
                 else:
-                    # we've got an empty response, this sometimes happens with anthropic (and perhaps other models)
-                    # when the model has already returned text along side tool calls
-                    # in this scenario, if text responses are allowed, we return text from the most recent model
-                    # response, if any
-                    if isinstance(ctx.deps.output_schema, _output.TextOutputSchema):
-                        for message in reversed(ctx.state.message_history):
-                            if isinstance(message, _messages.ModelResponse):
-                                last_texts = [p.content for p in message.parts if isinstance(p, _messages.TextPart)]
-                                if last_texts:
-                                    self._next_node = await self._handle_text_response(ctx, last_texts)
-                                    return
-
-                    raise exceptions.UnexpectedModelBehavior('Received empty model response')
+                    # we've got an empty response
+
+                    thinking_parts = [p for p in self.model_response.parts if isinstance(p, _messages.ThinkingPart)]
+
+                    if thinking_parts:
+                        # handle thinking-only responses (responses that contain only ThinkingPart instances)
+                        # this can happen with models that support thinking mode when they don't provide
+                        # actionable output alongside their thinking content.
+                        self._next_node = await _create_thinking_retry(ctx)
+                    else:
+                        # handle empty response with no thinking
+                        # this sometimes happens with anthropic (and perhaps other models)
+                        # when the model has already returned text along side tool calls
+                        # in this scenario, if text responses are allowed, we return text from the most recent model
+                        # response, if any
+                        if isinstance(ctx.deps.output_schema, _output.TextOutputSchema):
+                            for message in reversed(ctx.state.message_history):
+                                if isinstance(message, _messages.ModelResponse):
+                                    last_texts = [p.content for p in message.parts if isinstance(p, _messages.TextPart)]
+                                    if last_texts:
+                                        self._next_node = await self._handle_text_response(ctx, last_texts)
+                                        return
+
+                        raise exceptions.UnexpectedModelBehavior('Received empty model response')
 
             self._events_iterator = _run_stream()
 
diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -4087,3 +4087,37 @@ def bar() -> str:
     assert run.result.output == snapshot(Foo(a=0, b='a'))
     assert test_model.last_model_request_parameters is not None
     assert [t.name for t in test_model.last_model_request_parameters.function_tools] == snapshot(['bar'])
+
+
+async def test_thinking_only_response_retry():
+    """Test that thinking-only responses trigger a retry mechanism."""
+    from pydantic_ai.messages import ThinkingPart
+    from pydantic_ai.models.function import FunctionModel
+
+    call_count = 0
+
+    def model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
+        nonlocal call_count
+        call_count += 1
+
+        if call_count == 1:
+            # First call: return thinking-only response
+            return ModelResponse(
+                parts=[ThinkingPart(content='Let me think about this...')],
+                model_name='thinking-test-model',
+            )
+        else:
+            # Second call: return proper response
+            return ModelResponse(
+                parts=[TextPart(content='Final answer')],
+                model_name='thinking-test-model',
+            )
+
+    model = FunctionModel(model_function)
+    agent = Agent(model, system_prompt='You are a helpful assistant.')
+
+    result = await agent.run('Hello')
+
+    # Should have made exactly 2 calls and produced the expected result
+    assert call_count == 2
+    assert result.output == 'Final answer'