Prioritize tool calls over eager text responses (#505)

sydney-runkle · dmontagu · web-flow · commit 866a03110079 · 2024-12-20T16:28:04.000Z
Co-authored-by: David Montague &lt;35119617+dmontagu@users.noreply.github.com&gt;
diff --git a/pydantic_ai_examples/weather_agent.py b/pydantic_ai_examples/weather_agent.py
@@ -35,7 +35,11 @@ class Deps:
 
 weather_agent = Agent(
     'openai:gpt-4o',
-    system_prompt='Be concise, reply with one sentence.',
+    system_prompt=(
+        'Be concise, reply with one sentence.'
+        'Use the `get_lat_lng` tool to get the latitude and longitude of the locations, '
+        'then use the `get_weather` tool to get the weather.'
+    ),
     deps_type=Deps,
     retries=2,
 )
diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -864,11 +864,15 @@ async def _handle_model_response(
             else:
                 tool_calls.append(part)
 
-        if texts:
+        # At the moment, we prioritize at least executing tool calls if they are present.
+        # In the future, we'd consider making this configurable at the agent or run level.
+        # This accounts for cases like anthropic returns that might contain a text response
+        # and a tool call response, where the text response just indicates the tool call will happen.
+        if tool_calls:
+            return await self._handle_structured_response(tool_calls, run_context)
+        elif texts:
             text = '\n\n'.join(texts)
             return await self._handle_text_response(text, run_context)
-        elif tool_calls:
-            return await self._handle_structured_response(tool_calls, run_context)
         else:
             raise exceptions.UnexpectedModelBehavior('Received empty model response')
 
diff --git a/tests/models/test_gemini.py b/tests/models/test_gemini.py
@@ -19,6 +19,7 @@
     ModelResponse,
     RetryPromptPart,
     SystemPromptPart,
+    TextPart,
     ToolCallPart,
     ToolReturnPart,
     UserPromptPart,
@@ -537,25 +538,58 @@ def handler(_: httpx.Request):
 
 
 async def test_heterogeneous_responses(get_gemini_client: GetGeminiClient):
-    response = gemini_response(
-        _GeminiContent(
-            role='model',
-            parts=[
-                _GeminiTextPart(text='foo'),
-                _function_call_part_from_call(
+    """Indicates that tool calls are prioritized over text in heterogeneous responses."""
+    responses = [
+        gemini_response(
+            _content_model_response(
+                ModelResponse(
+                    parts=[TextPart(content='foo'), ToolCallPart.from_raw_args('get_location', {'loc_name': 'London'})]
+                )
+            )
+        ),
+        gemini_response(_content_model_response(ModelResponse.from_text('final response'))),
+    ]
+
+    gemini_client = get_gemini_client(responses)
+    m = GeminiModel('gemini-1.5-flash', http_client=gemini_client)
+    agent = Agent(m)
+
+    @agent.tool_plain
+    async def get_location(loc_name: str) -> str:
+        if loc_name == 'London':
+            return json.dumps({'lat': 51, 'lng': 0})
+        else:
+            raise ModelRetry('Wrong location, please try again')
+
+    result = await agent.run('Hello')
+    assert result.data == 'final response'
+    assert result.all_messages() == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(content='Hello', timestamp=IsNow(tz=timezone.utc)),
+                ]
+            ),
+            ModelResponse(
+                parts=[
+                    TextPart(content='foo'),
                     ToolCallPart(
                         tool_name='get_location',
-                        args=ArgsDict(args_dict={'loc_name': 'San Fransisco'}),
+                        args=ArgsDict(args_dict={'loc_name': 'London'}),
+                    ),
+                ],
+                timestamp=IsNow(tz=timezone.utc),
+            ),
+            ModelRequest(
+                parts=[
+                    ToolReturnPart(
+                        tool_name='get_location', content='{"lat": 51, "lng": 0}', timestamp=IsNow(tz=timezone.utc)
                     )
-                ),
-            ],
-        )
+                ]
+            ),
+            ModelResponse.from_text(content='final response', timestamp=IsNow(tz=timezone.utc)),
+        ]
     )
-    gemini_client = get_gemini_client(response)
-    m = GeminiModel('gemini-1.5-flash', http_client=gemini_client)
-    agent = Agent(m)
-    result = await agent.run('Hello')
-    assert result.data == 'foo'
 
 
 async def test_stream_text(get_gemini_client: GetGeminiClient):