Skip to content

Commit eeeb32d

Browse files
ethanabrooksDouweM
andauthored
Ask model to try again if it produced a response without text or tool calls, only thinking (#2556)
Co-authored-by: Douwe Maan <[email protected]>
1 parent dc3b0da commit eeeb32d

File tree

2 files changed

+83
-7
lines changed

2 files changed

+83
-7
lines changed

pydantic_ai_slim/pydantic_ai/_agent_graph.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -434,9 +434,11 @@ async def _run_stream( # noqa: C901
434434
if self._events_iterator is None:
435435
# Ensure that the stream is only run once
436436

437-
async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
437+
async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]: # noqa: C901
438438
texts: list[str] = []
439439
tool_calls: list[_messages.ToolCallPart] = []
440+
thinking_parts: list[_messages.ThinkingPart] = []
441+
440442
for part in self.model_response.parts:
441443
if isinstance(part, _messages.TextPart):
442444
# ignore empty content for text parts, see #437
@@ -449,11 +451,7 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
449451
elif isinstance(part, _messages.BuiltinToolReturnPart):
450452
yield _messages.BuiltinToolResultEvent(part)
451453
elif isinstance(part, _messages.ThinkingPart):
452-
# We don't need to do anything with thinking parts in this tool-calling node.
453-
# We need to handle text parts in case there are no tool calls and/or the desired output comes
454-
# from the text, but thinking parts should not directly influence the execution of tools or
455-
# determination of the next node of graph execution here.
456-
pass
454+
thinking_parts.append(part)
457455
else:
458456
assert_never(part)
459457

@@ -467,8 +465,18 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:
467465
elif texts:
468466
# No events are emitted during the handling of text responses, so we don't need to yield anything
469467
self._next_node = await self._handle_text_response(ctx, texts)
468+
elif thinking_parts:
469+
# handle thinking-only responses (responses that contain only ThinkingPart instances)
470+
# this can happen with models that support thinking mode when they don't provide
471+
# actionable output alongside their thinking content.
472+
self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
473+
_messages.ModelRequest(
474+
parts=[_messages.RetryPromptPart('Responses without text or tool calls are not permitted.')]
475+
)
476+
)
470477
else:
471-
# we've got an empty response, this sometimes happens with anthropic (and perhaps other models)
478+
# we got an empty response with no tool calls, text, or thinking
479+
# this sometimes happens with anthropic (and perhaps other models)
472480
# when the model has already returned text along side tool calls
473481
# in this scenario, if text responses are allowed, we return text from the most recent model
474482
# response, if any

tests/test_agent.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4087,3 +4087,71 @@ def bar() -> str:
40874087
assert run.result.output == snapshot(Foo(a=0, b='a'))
40884088
assert test_model.last_model_request_parameters is not None
40894089
assert [t.name for t in test_model.last_model_request_parameters.function_tools] == snapshot(['bar'])
4090+
4091+
4092+
async def test_thinking_only_response_retry():
4093+
"""Test that thinking-only responses trigger a retry mechanism."""
4094+
from pydantic_ai.messages import ThinkingPart
4095+
from pydantic_ai.models.function import FunctionModel
4096+
4097+
call_count = 0
4098+
4099+
def model_function(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
4100+
nonlocal call_count
4101+
call_count += 1
4102+
4103+
if call_count == 1:
4104+
# First call: return thinking-only response
4105+
return ModelResponse(
4106+
parts=[ThinkingPart(content='Let me think about this...')],
4107+
model_name='thinking-test-model',
4108+
)
4109+
else:
4110+
# Second call: return proper response
4111+
return ModelResponse(
4112+
parts=[TextPart(content='Final answer')],
4113+
model_name='thinking-test-model',
4114+
)
4115+
4116+
model = FunctionModel(model_function)
4117+
agent = Agent(model, system_prompt='You are a helpful assistant.')
4118+
4119+
result = await agent.run('Hello')
4120+
4121+
assert result.all_messages() == snapshot(
4122+
[
4123+
ModelRequest(
4124+
parts=[
4125+
SystemPromptPart(
4126+
content='You are a helpful assistant.',
4127+
timestamp=IsDatetime(),
4128+
),
4129+
UserPromptPart(
4130+
content='Hello',
4131+
timestamp=IsDatetime(),
4132+
),
4133+
]
4134+
),
4135+
ModelResponse(
4136+
parts=[ThinkingPart(content='Let me think about this...')],
4137+
usage=Usage(requests=1, request_tokens=57, response_tokens=6, total_tokens=63),
4138+
model_name='function:model_function:',
4139+
timestamp=IsDatetime(),
4140+
),
4141+
ModelRequest(
4142+
parts=[
4143+
RetryPromptPart(
4144+
content='Responses without text or tool calls are not permitted.',
4145+
tool_call_id=IsStr(),
4146+
timestamp=IsDatetime(),
4147+
)
4148+
]
4149+
),
4150+
ModelResponse(
4151+
parts=[TextPart(content='Final answer')],
4152+
usage=Usage(requests=1, request_tokens=75, response_tokens=8, total_tokens=83),
4153+
model_name='function:model_function:',
4154+
timestamp=IsDatetime(),
4155+
),
4156+
]
4157+
)

0 commit comments

Comments
 (0)