Support parallel_tool_calls in ModelSettings (#750)

sydney-runkle · web-flow · commit 5894c38400a1 · 2025-01-23T12:11:06.000-05:00
diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
@@ -186,16 +186,22 @@ async def _messages_create(
         self, messages: list[ModelMessage], stream: bool, model_settings: ModelSettings | None
     ) -> AnthropicMessage | AsyncStream[RawMessageStreamEvent]:
         # standalone function to make it easier to override
+        model_settings = model_settings or {}
+
+        tool_choice: ToolChoiceParam | None
+
         if not self.tools:
-            tool_choice: ToolChoiceParam | None = None
-        elif not self.allow_text_result:
-            tool_choice = {'type': 'any'}
+            tool_choice = None
         else:
-            tool_choice = {'type': 'auto'}
+            if not self.allow_text_result:
+                tool_choice = {'type': 'any'}
+            else:
+                tool_choice = {'type': 'auto'}
 
-        system_prompt, anthropic_messages = self._map_message(messages)
+            if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
+                tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
 
-        model_settings = model_settings or {}
+        system_prompt, anthropic_messages = self._map_message(messages)
 
         return await self.client.messages.create(
             max_tokens=model_settings.get('max_tokens', 1024),
diff --git a/pydantic_ai_slim/pydantic_ai/models/groq.py b/pydantic_ai_slim/pydantic_ai/models/groq.py
@@ -197,7 +197,7 @@ async def _completions_create(
             model=str(self.model_name),
             messages=groq_messages,
             n=1,
-            parallel_tool_calls=True if self.tools else NOT_GIVEN,
+            parallel_tool_calls=model_settings.get('parallel_tool_calls', True if self.tools else NOT_GIVEN),
             tools=self.tools or NOT_GIVEN,
             tool_choice=tool_choice or NOT_GIVEN,
             stream=stream,
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -195,7 +195,7 @@ async def _completions_create(
             model=self.model_name,
             messages=openai_messages,
             n=1,
-            parallel_tool_calls=True if self.tools else NOT_GIVEN,
+            parallel_tool_calls=model_settings.get('parallel_tool_calls', True if self.tools else NOT_GIVEN),
             tools=self.tools or NOT_GIVEN,
             tool_choice=tool_choice or NOT_GIVEN,
             stream=stream,
diff --git a/pydantic_ai_slim/pydantic_ai/settings.py b/pydantic_ai_slim/pydantic_ai/settings.py
@@ -12,7 +12,8 @@
 class ModelSettings(TypedDict, total=False):
     """Settings to configure an LLM.
 
-    Here we include only settings which apply to multiple models / model providers.
+    Here we include only settings which apply to multiple models / model providers,
+    though not all of these settings are supported by all models.
     """
 
     max_tokens: int
@@ -25,6 +26,7 @@ class ModelSettings(TypedDict, total=False):
     * OpenAI
     * Groq
     * Cohere
+    * Mistral
     """
 
     temperature: float
@@ -42,6 +44,7 @@ class ModelSettings(TypedDict, total=False):
     * OpenAI
     * Groq
     * Cohere
+    * Mistral
     """
 
     top_p: float
@@ -58,6 +61,7 @@ class ModelSettings(TypedDict, total=False):
     * OpenAI
     * Groq
     * Cohere
+    * Mistral
     """
 
     timeout: float | Timeout
@@ -69,6 +73,16 @@ class ModelSettings(TypedDict, total=False):
     * Anthropic
     * OpenAI
     * Groq
+    * Mistral
+    """
+
+    parallel_tool_calls: bool
+    """Whether to allow parallel tool calls.
+
+    Supported by:
+    * OpenAI
+    * Groq
+    * Anthropic
     """
 
 
diff --git a/tests/models/test_anthropic.py b/tests/models/test_anthropic.py
@@ -1,7 +1,7 @@
 from __future__ import annotations as _annotations
 
 import json
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import timezone
 from functools import cached_property
 from typing import Any, cast
@@ -22,11 +22,12 @@
     UserPromptPart,
 )
 from pydantic_ai.result import Usage
+from pydantic_ai.settings import ModelSettings
 
 from ..conftest import IsNow, try_import
 
 with try_import() as imports_successful:
-    from anthropic import AsyncAnthropic
+    from anthropic import NOT_GIVEN, AsyncAnthropic
     from anthropic.types import (
         ContentBlock,
         Message as AnthropicMessage,
@@ -53,6 +54,7 @@ def test_init():
 class MockAnthropic:
     messages_: AnthropicMessage | list[AnthropicMessage] | None = None
     index = 0
+    chat_completion_kwargs: list[dict[str, Any]] = field(default_factory=list)
 
     @cached_property
     def messages(self) -> Any:
@@ -62,7 +64,9 @@ def messages(self) -> Any:
     def create_mock(cls, messages_: AnthropicMessage | list[AnthropicMessage]) -> AsyncAnthropic:
         return cast(AsyncAnthropic, cls(messages_=messages_))
 
-    async def messages_create(self, *_args: Any, **_kwargs: Any) -> AnthropicMessage:
+    async def messages_create(self, *_args: Any, **kwargs: Any) -> AnthropicMessage:
+        self.chat_completion_kwargs.append({k: v for k, v in kwargs.items() if v is not NOT_GIVEN})
+
         assert self.messages_ is not None, '`messages` must be provided'
         if isinstance(self.messages_, list):
             response = self.messages_[self.index]
@@ -257,3 +261,40 @@ async def get_location(loc_name: str) -> str:
             ),
         ]
     )
+
+
+def get_mock_chat_completion_kwargs(async_anthropic: AsyncAnthropic) -> list[dict[str, Any]]:
+    if isinstance(async_anthropic, MockAnthropic):
+        return async_anthropic.chat_completion_kwargs
+    else:  # pragma: no cover
+        raise RuntimeError('Not a MockOpenAI instance')
+
+
+@pytest.mark.parametrize('parallel_tool_calls', [True, False])
+async def test_parallel_tool_calls(allow_model_requests: None, parallel_tool_calls: bool) -> None:
+    responses = [
+        completion_message(
+            [ToolUseBlock(id='1', input={'loc_name': 'San Francisco'}, name='get_location', type='tool_use')],
+            usage=AnthropicUsage(input_tokens=2, output_tokens=1),
+        ),
+        completion_message(
+            [TextBlock(text='final response', type='text')],
+            usage=AnthropicUsage(input_tokens=3, output_tokens=5),
+        ),
+    ]
+
+    mock_client = MockAnthropic.create_mock(responses)
+    m = AnthropicModel('claude-3-5-haiku-latest', anthropic_client=mock_client)
+    agent = Agent(m, model_settings=ModelSettings(parallel_tool_calls=parallel_tool_calls))
+
+    @agent.tool_plain
+    async def get_location(loc_name: str) -> str:
+        if loc_name == 'London':
+            return json.dumps({'lat': 51, 'lng': 0})
+        else:
+            raise ModelRetry('Wrong location, please try again')
+
+    await agent.run('hello')
+    assert get_mock_chat_completion_kwargs(mock_client)[0]['tool_choice']['disable_parallel_tool_use'] == (
+        not parallel_tool_calls
+    )
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -23,6 +23,7 @@
     UserPromptPart,
 )
 from pydantic_ai.result import Usage
+from pydantic_ai.settings import ModelSettings
 
 from ..conftest import IsNow, try_import
 from .mock_async_stream import MockAsyncStream
@@ -539,3 +540,26 @@ async def test_system_prompt_role(
             'n': 1,
         }
     ]
+
+
+@pytest.mark.parametrize('parallel_tool_calls', [True, False])
+async def test_parallel_tool_calls(allow_model_requests: None, parallel_tool_calls: bool) -> None:
+    c = completion_message(
+        ChatCompletionMessage(
+            content=None,
+            role='assistant',
+            tool_calls=[
+                chat.ChatCompletionMessageToolCall(
+                    id='123',
+                    function=Function(arguments='{"response": [1, 2, 3]}', name='final_result'),
+                    type='function',
+                )
+            ],
+        )
+    )
+    mock_client = MockOpenAI.create_mock(c)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
+    agent = Agent(m, result_type=list[int], model_settings=ModelSettings(parallel_tool_calls=parallel_tool_calls))
+
+    await agent.run('Hello')
+    assert get_mock_chat_completion_kwargs(mock_client)[0]['parallel_tool_calls'] == parallel_tool_calls