Fix handling of OpenAI system prompts in order to support o1 (#740)

sydney-runkle · dmontagu · web-flow · commit 10b3c911b19b · 2025-01-22T10:52:41.000-07:00
Co-authored-by: David Montague &lt;35119617+dmontagu@users.noreply.github.com&gt;
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -51,6 +51,8 @@
 allows this model to be used more easily with other model types (ie, Ollama)
 """
 
+OpenAISystemPromptRole = Literal['system', 'developer']
+
 
 @dataclass(init=False)
 class OpenAIModel(Model):
@@ -63,6 +65,7 @@ class OpenAIModel(Model):
 
     model_name: OpenAIModelName
     client: AsyncOpenAI = field(repr=False)
+    system_prompt_role: OpenAISystemPromptRole | None = field(default=None)
 
     def __init__(
         self,
@@ -72,6 +75,7 @@ def __init__(
         api_key: str | None = None,
         openai_client: AsyncOpenAI | None = None,
         http_client: AsyncHTTPClient | None = None,
+        system_prompt_role: OpenAISystemPromptRole | None = None,
     ):
         """Initialize an OpenAI model.
 
@@ -87,6 +91,8 @@ def __init__(
                 [`AsyncOpenAI`](https://github.com/openai/openai-python?tab=readme-ov-file#async-usage)
                 client to use. If provided, `base_url`, `api_key`, and `http_client` must be `None`.
             http_client: An existing `httpx.AsyncClient` to use for making HTTP requests.
+            system_prompt_role: The role to use for the system prompt message. If not provided, defaults to `'system'`.
+                In the future, this may be inferred from the model name.
         """
         self.model_name: OpenAIModelName = model_name
         if openai_client is not None:
@@ -98,6 +104,7 @@ def __init__(
             self.client = AsyncOpenAI(base_url=base_url, api_key=api_key, http_client=http_client)
         else:
             self.client = AsyncOpenAI(base_url=base_url, api_key=api_key, http_client=cached_async_http_client())
+        self.system_prompt_role = system_prompt_role
 
     async def agent_model(
         self,
@@ -115,6 +122,7 @@ async def agent_model(
             self.model_name,
             allow_text_result,
             tools,
+            self.system_prompt_role,
         )
 
     def name(self) -> str:
@@ -140,6 +148,7 @@ class OpenAIAgentModel(AgentModel):
     model_name: OpenAIModelName
     allow_text_result: bool
     tools: list[chat.ChatCompletionToolParam]
+    system_prompt_role: OpenAISystemPromptRole | None
 
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
@@ -222,11 +231,10 @@ async def _process_streamed_response(self, response: AsyncStream[ChatCompletionC
             _timestamp=datetime.fromtimestamp(first_chunk.created, tz=timezone.utc),
         )
 
-    @classmethod
-    def _map_message(cls, message: ModelMessage) -> Iterable[chat.ChatCompletionMessageParam]:
+    def _map_message(self, message: ModelMessage) -> Iterable[chat.ChatCompletionMessageParam]:
         """Just maps a `pydantic_ai.Message` to a `openai.types.ChatCompletionMessageParam`."""
         if isinstance(message, ModelRequest):
-            yield from cls._map_user_message(message)
+            yield from self._map_user_message(message)
         elif isinstance(message, ModelResponse):
             texts: list[str] = []
             tool_calls: list[chat.ChatCompletionMessageToolCallParam] = []
@@ -248,11 +256,13 @@ def _map_message(cls, message: ModelMessage) -> Iterable[chat.ChatCompletionMess
         else:
             assert_never(message)
 
-    @classmethod
-    def _map_user_message(cls, message: ModelRequest) -> Iterable[chat.ChatCompletionMessageParam]:
+    def _map_user_message(self, message: ModelRequest) -> Iterable[chat.ChatCompletionMessageParam]:
         for part in message.parts:
             if isinstance(part, SystemPromptPart):
-                yield chat.ChatCompletionSystemMessageParam(role='system', content=part.content)
+                if self.system_prompt_role == 'developer':
+                    yield chat.ChatCompletionDeveloperMessageParam(role='developer', content=part.content)
+                else:
+                    yield chat.ChatCompletionSystemMessageParam(role='system', content=part.content)
             elif isinstance(part, UserPromptPart):
                 yield chat.ChatCompletionUserMessageParam(role='user', content=part.content)
             elif isinstance(part, ToolReturnPart):
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
@@ -45,7 +45,7 @@ dependencies = [
 # WARNING if you add optional groups, please update docs/install.md
 logfire = ["logfire>=2.3"]
 graph = ["pydantic-graph==0.0.19"]
-openai = ["openai>=1.54.3"]
+openai = ["openai>=1.59.0"]
 cohere = ["cohere>=5.13.11"]
 vertexai = ["google-auth>=2.36.0", "requests>=2.32.3"]
 anthropic = ["anthropic>=0.40.0"]
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -2,7 +2,7 @@
 
 import json
 from collections.abc import Sequence
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from functools import cached_property
 from typing import Any, Literal, cast
@@ -28,7 +28,7 @@
 from .mock_async_stream import MockAsyncStream
 
 with try_import() as imports_successful:
-    from openai import AsyncOpenAI
+    from openai import NOT_GIVEN, AsyncOpenAI
     from openai.types import chat
     from openai.types.chat.chat_completion import Choice
     from openai.types.chat.chat_completion_chunk import (
@@ -41,7 +41,7 @@
     from openai.types.chat.chat_completion_message_tool_call import Function
     from openai.types.completion_usage import CompletionUsage, PromptTokensDetails
 
-    from pydantic_ai.models.openai import OpenAIModel
+    from pydantic_ai.models.openai import OpenAIModel, OpenAISystemPromptRole
 
 pytestmark = [
     pytest.mark.skipif(not imports_successful(), reason='openai not installed'),
@@ -50,25 +50,26 @@
 
 
 def test_init():
-    m = OpenAIModel('gpt-4', api_key='foobar')
+    m = OpenAIModel('gpt-4o', api_key='foobar')
     assert str(m.client.base_url) == 'https://api.openai.com/v1/'
     assert m.client.api_key == 'foobar'
-    assert m.name() == 'openai:gpt-4'
+    assert m.name() == 'openai:gpt-4o'
 
 
 def test_init_with_base_url():
-    m = OpenAIModel('gpt-4', base_url='https://example.com/v1', api_key='foobar')
+    m = OpenAIModel('gpt-4o', base_url='https://example.com/v1', api_key='foobar')
     assert str(m.client.base_url) == 'https://example.com/v1/'
     assert m.client.api_key == 'foobar'
-    assert m.name() == 'openai:gpt-4'
+    assert m.name() == 'openai:gpt-4o'
     m.name()
 
 
 @dataclass
 class MockOpenAI:
     completions: chat.ChatCompletion | list[chat.ChatCompletion] | None = None
     stream: list[chat.ChatCompletionChunk] | list[list[chat.ChatCompletionChunk]] | None = None
-    index = 0
+    index: int = 0
+    chat_completion_kwargs: list[dict[str, Any]] = field(default_factory=list)
 
     @cached_property
     def chat(self) -> Any:
@@ -86,8 +87,10 @@ def create_mock_stream(
         return cast(AsyncOpenAI, cls(stream=list(stream)))  # pyright: ignore[reportArgumentType]
 
     async def chat_completions_create(  # pragma: no cover
-        self, *_args: Any, stream: bool = False, **_kwargs: Any
+        self, *_args: Any, stream: bool = False, **kwargs: Any
     ) -> chat.ChatCompletion | MockAsyncStream[chat.ChatCompletionChunk]:
+        self.chat_completion_kwargs.append({k: v for k, v in kwargs.items() if v is not NOT_GIVEN})
+
         if stream:
             assert self.stream is not None, 'you can only used `stream=True` if `stream` is provided'
             # noinspection PyUnresolvedReferences
@@ -106,12 +109,19 @@ async def chat_completions_create(  # pragma: no cover
         return response
 
 
+def get_mock_chat_completion_kwargs(async_open_ai: AsyncOpenAI) -> list[dict[str, Any]]:
+    if isinstance(async_open_ai, MockOpenAI):
+        return async_open_ai.chat_completion_kwargs
+    else:  # pragma: no cover
+        raise RuntimeError('Not a MockOpenAI instance')
+
+
 def completion_message(message: ChatCompletionMessage, *, usage: CompletionUsage | None = None) -> chat.ChatCompletion:
     return chat.ChatCompletion(
         id='123',
         choices=[Choice(finish_reason='stop', index=0, message=message)],
         created=1704067200,  # 2024-01-01
-        model='gpt-4',
+        model='gpt-4o',
         object='chat.completion',
         usage=usage,
     )
@@ -120,7 +130,7 @@ def completion_message(message: ChatCompletionMessage, *, usage: CompletionUsage
 async def test_request_simple_success(allow_model_requests: None):
     c = completion_message(ChatCompletionMessage(content='world', role='assistant'))
     mock_client = MockOpenAI.create_mock(c)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m)
 
     result = await agent.run('hello')
@@ -138,17 +148,29 @@ async def test_request_simple_success(allow_model_requests: None):
             ModelRequest(parts=[UserPromptPart(content='hello', timestamp=IsNow(tz=timezone.utc))]),
             ModelResponse(
                 parts=[TextPart(content='world')],
-                model_name='gpt-4',
+                model_name='gpt-4o',
                 timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
             ),
             ModelRequest(parts=[UserPromptPart(content='hello', timestamp=IsNow(tz=timezone.utc))]),
             ModelResponse(
                 parts=[TextPart(content='world')],
-                model_name='gpt-4',
+                model_name='gpt-4o',
                 timestamp=datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc),
             ),
         ]
     )
+    assert get_mock_chat_completion_kwargs(mock_client) == [
+        {'messages': [{'content': 'hello', 'role': 'user'}], 'model': 'gpt-4o', 'n': 1},
+        {
+            'messages': [
+                {'content': 'hello', 'role': 'user'},
+                {'content': 'world', 'role': 'assistant'},
+                {'content': 'hello', 'role': 'user'},
+            ],
+            'model': 'gpt-4o',
+            'n': 1,
+        },
+    ]
 
 
 async def test_request_simple_usage(allow_model_requests: None):
@@ -157,7 +179,7 @@ async def test_request_simple_usage(allow_model_requests: None):
         usage=CompletionUsage(completion_tokens=1, prompt_tokens=2, total_tokens=3),
     )
     mock_client = MockOpenAI.create_mock(c)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m)
 
     result = await agent.run('Hello')
@@ -180,7 +202,7 @@ async def test_request_structured_response(allow_model_requests: None):
         )
     )
     mock_client = MockOpenAI.create_mock(c)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m, result_type=list[int])
 
     result = await agent.run('Hello')
@@ -196,7 +218,7 @@ async def test_request_structured_response(allow_model_requests: None):
                         tool_call_id='123',
                     )
                 ],
-                model_name='gpt-4',
+                model_name='gpt-4o',
                 timestamp=datetime(2024, 1, 1, tzinfo=timezone.utc),
             ),
             ModelRequest(
@@ -256,7 +278,7 @@ async def test_request_tool_call(allow_model_requests: None):
         completion_message(ChatCompletionMessage(content='final response', role='assistant')),
     ]
     mock_client = MockOpenAI.create_mock(responses)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m, system_prompt='this is the system prompt')
 
     @agent.tool_plain
@@ -284,7 +306,7 @@ async def get_location(loc_name: str) -> str:
                         tool_call_id='1',
                     )
                 ],
-                model_name='gpt-4',
+                model_name='gpt-4o',
                 timestamp=datetime(2024, 1, 1, tzinfo=timezone.utc),
             ),
             ModelRequest(
@@ -305,7 +327,7 @@ async def get_location(loc_name: str) -> str:
                         tool_call_id='2',
                     )
                 ],
-                model_name='gpt-4',
+                model_name='gpt-4o',
                 timestamp=datetime(2024, 1, 1, tzinfo=timezone.utc),
             ),
             ModelRequest(
@@ -320,7 +342,7 @@ async def get_location(loc_name: str) -> str:
             ),
             ModelResponse(
                 parts=[TextPart(content='final response')],
-                model_name='gpt-4',
+                model_name='gpt-4o',
                 timestamp=datetime(2024, 1, 1, tzinfo=timezone.utc),
             ),
         ]
@@ -346,7 +368,7 @@ def chunk(delta: list[ChoiceDelta], finish_reason: FinishReason | None = None) -
             ChunkChoice(index=index, delta=delta, finish_reason=finish_reason) for index, delta in enumerate(delta)
         ],
         created=1704067200,  # 2024-01-01
-        model='gpt-4',
+        model='gpt-4o',
         object='chat.completion.chunk',
         usage=CompletionUsage(completion_tokens=1, prompt_tokens=2, total_tokens=3),
     )
@@ -359,7 +381,7 @@ def text_chunk(text: str, finish_reason: FinishReason | None = None) -> chat.Cha
 async def test_stream_text(allow_model_requests: None):
     stream = text_chunk('hello '), text_chunk('world'), chunk([])
     mock_client = MockOpenAI.create_mock_stream(stream)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m)
 
     async with agent.run_stream('') as result:
@@ -372,7 +394,7 @@ async def test_stream_text(allow_model_requests: None):
 async def test_stream_text_finish_reason(allow_model_requests: None):
     stream = text_chunk('hello '), text_chunk('world'), text_chunk('.', finish_reason='stop')
     mock_client = MockOpenAI.create_mock_stream(stream)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m)
 
     async with agent.run_stream('') as result:
@@ -419,7 +441,7 @@ async def test_stream_structured(allow_model_requests: None):
         chunk([]),
     )
     mock_client = MockOpenAI.create_mock_stream(stream)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m, result_type=MyTypedDict)
 
     async with agent.run_stream('') as result:
@@ -447,7 +469,7 @@ async def test_stream_structured_finish_reason(allow_model_requests: None):
         struc_chunk(None, None, finish_reason='stop'),
     )
     mock_client = MockOpenAI.create_mock_stream(stream)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m, result_type=MyTypedDict)
 
     async with agent.run_stream('') as result:
@@ -467,7 +489,7 @@ async def test_stream_structured_finish_reason(allow_model_requests: None):
 async def test_no_content(allow_model_requests: None):
     stream = chunk([ChoiceDelta()]), chunk([ChoiceDelta()])
     mock_client = MockOpenAI.create_mock_stream(stream)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m, result_type=MyTypedDict)
 
     with pytest.raises(UnexpectedModelBehavior, match='Received empty model response'):
@@ -482,11 +504,38 @@ async def test_no_delta(allow_model_requests: None):
         text_chunk('world'),
     )
     mock_client = MockOpenAI.create_mock_stream(stream)
-    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    m = OpenAIModel('gpt-4o', openai_client=mock_client)
     agent = Agent(m)
 
     async with agent.run_stream('') as result:
         assert not result.is_complete
         assert [c async for c in result.stream_text(debounce_by=None)] == snapshot(['hello ', 'hello world'])
         assert result.is_complete
         assert result.usage() == snapshot(Usage(requests=1, request_tokens=6, response_tokens=3, total_tokens=9))
+
+
+@pytest.mark.parametrize('system_prompt_role', ['system', 'developer', None])
+async def test_system_prompt_role(
+    allow_model_requests: None, system_prompt_role: OpenAISystemPromptRole | None
+) -> None:
+    """Testing the system prompt role for OpenAI models is properly set / inferred."""
+
+    c = completion_message(ChatCompletionMessage(content='world', role='assistant'))
+    mock_client = MockOpenAI.create_mock(c)
+    m = OpenAIModel('gpt-4o', system_prompt_role=system_prompt_role, openai_client=mock_client)
+    assert m.system_prompt_role == system_prompt_role
+
+    agent = Agent(m, system_prompt='some instructions')
+    result = await agent.run('hello')
+    assert result.data == 'world'
+
+    assert get_mock_chat_completion_kwargs(mock_client) == [
+        {
+            'messages': [
+                {'content': 'some instructions', 'role': system_prompt_role or 'system'},
+                {'content': 'hello', 'role': 'user'},
+            ],
+            'model': 'gpt-4o',
+            'n': 1,
+        }
+    ]
diff --git a/uv.lock b/uv.lock