From dd1b39b174df702a47e227b8b3c30f62911310f9 Mon Sep 17 00:00:00 2001 From: Michael Hahn Date: Wed, 12 Nov 2025 23:01:42 -0800 Subject: [PATCH 1/5] Add support for aspect ratio in gemini image generation --- docs/builtin-tools.md | 22 ++++++- pydantic_ai_slim/pydantic_ai/builtin_tools.py | 11 ++++ pydantic_ai_slim/pydantic_ai/models/google.py | 62 ++++++++++++------- tests/models/test_google.py | 25 ++++++++ 4 files changed, 95 insertions(+), 25 deletions(-) diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md index 1e4bdfd279..164accd94b 100644 --- a/docs/builtin-tools.md +++ b/docs/builtin-tools.md @@ -202,7 +202,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl | Provider | Supported | Notes | |----------|-----------|-------| | OpenAI Responses | ✅ | Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. | -| Google | ✅ | No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md). These models will always generate images, even if this built-in tool is not explicitly specified. | +| Google | ✅ | Supports the `aspect_ratio` parameter when explicitly provided. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md) and will always generate images, even if this built-in tool is not explicitly specified. | | Anthropic | ❌ | | | Groq | ❌ | | | Bedrock | ❌ | | @@ -248,6 +248,23 @@ assert isinstance(result.response.images[0], BinaryImage) _(This example is complete, it can be run "as is")_ +To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly: + +```py {title="image_generation_google_aspect_ratio.py"} +from pydantic_ai import Agent, BinaryImage, ImageGenerationTool + +agent = Agent( + 'google-gla:gemini-2.5-flash-image', + builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')], + output_type=BinaryImage, +) + +result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.') +assert isinstance(result.output, BinaryImage) +``` + +_(This example is complete, it can be run "as is")_ + The `ImageGenerationTool` can be used together with `output_type=BinaryImage` to get [image output](output.md#image-output). If the `ImageGenerationTool` built-in tool is not explicitly specified, it will be enabled automatically: ```py {title="image_generation_output.py"} @@ -291,6 +308,8 @@ assert isinstance(result.output, BinaryImage) _(This example is complete, it can be run "as is")_ +Gemini image models support the separate `aspect_ratio` parameter; set `aspect_ratio='16:9'`, for example, when calling `ImageGenerationTool` to generate wide or tall compositions. + For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool]. #### Provider Support @@ -305,6 +324,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG | `partial_images` | ✅ | ❌ | | `quality` | ✅ | ❌ | | `size` | ✅ | ❌ | +| `aspect_ratio` | ❌ | ✅ | ## URL Context Tool diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py index 5559b3124a..1fa592f6d7 100644 --- a/pydantic_ai_slim/pydantic_ai/builtin_tools.py +++ b/pydantic_ai_slim/pydantic_ai/builtin_tools.py @@ -21,6 +21,9 @@ _BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {} +ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5'] +"""Supported aspect ratios for image generation tools.""" + @dataclass(kw_only=True) class AbstractBuiltinTool(ABC): @@ -255,6 +258,14 @@ class ImageGenerationTool(AbstractBuiltinTool): * OpenAI Responses """ + aspect_ratio: ImageAspectRatio | None = None + """The aspect ratio to use for generated images. + + Supported by: + + * Google image-generation models (Gemini) when the tool is explicitly enabled. + """ + kind: str = 'image_generation' """The kind of tool.""" diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 071f65fa66..4d8b40ffab 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -72,6 +72,7 @@ GoogleSearchDict, GroundingMetadata, HttpOptionsDict, + ImageConfigDict, MediaResolution, Modality, Part, @@ -325,12 +326,16 @@ async def request_stream( response = await self._generate_content(messages, True, model_settings, model_request_parameters) yield await self._process_streamed_response(response, model_request_parameters) # type: ignore - def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None: + def _get_tools( + self, model_request_parameters: ModelRequestParameters + ) -> tuple[list[ToolDict] | None, ImageConfigDict | None]: tools: list[ToolDict] = [ ToolDict(function_declarations=[_function_declaration_from_tool(t)]) for t in model_request_parameters.tool_defs.values() ] + image_config: ImageConfigDict | None = None + if model_request_parameters.builtin_tools: if model_request_parameters.function_tools: raise UserError('Google does not support function tools and built-in tools at the same time.') @@ -347,11 +352,17 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T raise UserError( "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead." ) + if tool.aspect_ratio: + if image_config and image_config.get('aspect_ratio') != tool.aspect_ratio: + raise UserError( + 'Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.' + ) + image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio) else: # pragma: no cover raise UserError( f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.' ) - return tools or None + return tools or None, image_config def _get_tool_config( self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None @@ -401,7 +412,7 @@ async def _build_content_and_config( model_settings: GoogleModelSettings, model_request_parameters: ModelRequestParameters, ) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]: - tools = self._get_tools(model_request_parameters) + tools, image_config = self._get_tools(model_request_parameters) if tools and not self.profile.supports_tools: raise UserError('Tools are not supported by this model.') @@ -437,27 +448,30 @@ async def _build_content_and_config( else: raise UserError('Google does not support setting ModelSettings.timeout to a httpx.Timeout') - config = GenerateContentConfigDict( - http_options=http_options, - system_instruction=system_instruction, - temperature=model_settings.get('temperature'), - top_p=model_settings.get('top_p'), - max_output_tokens=model_settings.get('max_tokens'), - stop_sequences=model_settings.get('stop_sequences'), - presence_penalty=model_settings.get('presence_penalty'), - frequency_penalty=model_settings.get('frequency_penalty'), - seed=model_settings.get('seed'), - safety_settings=model_settings.get('google_safety_settings'), - thinking_config=model_settings.get('google_thinking_config'), - labels=model_settings.get('google_labels'), - media_resolution=model_settings.get('google_video_resolution'), - cached_content=model_settings.get('google_cached_content'), - tools=cast(ToolListUnionDict, tools), - tool_config=tool_config, - response_mime_type=response_mime_type, - response_schema=response_schema, - response_modalities=modalities, - ) + config: GenerateContentConfigDict = { + 'http_options': http_options, + 'system_instruction': system_instruction, + 'temperature': model_settings.get('temperature'), + 'top_p': model_settings.get('top_p'), + 'max_output_tokens': model_settings.get('max_tokens'), + 'stop_sequences': model_settings.get('stop_sequences'), + 'presence_penalty': model_settings.get('presence_penalty'), + 'frequency_penalty': model_settings.get('frequency_penalty'), + 'seed': model_settings.get('seed'), + 'safety_settings': model_settings.get('google_safety_settings'), + 'thinking_config': model_settings.get('google_thinking_config'), + 'labels': model_settings.get('google_labels'), + 'media_resolution': model_settings.get('google_video_resolution'), + 'cached_content': model_settings.get('google_cached_content'), + 'tools': cast(ToolListUnionDict, tools), + 'tool_config': tool_config, + 'response_mime_type': response_mime_type, + 'response_schema': response_schema, + 'response_modalities': modalities, + } + if image_config: + config['image_config'] = image_config + return contents, config def _process_response(self, response: GenerateContentResponse) -> ModelResponse: diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 82332f38ef..e5b54f5b06 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -3134,6 +3134,31 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p await agent.run('Generate an image of an axolotl.') +async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None: + model = GoogleModel('gemini-2.5-flash-image', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')]) + + tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage] + assert tools is None + assert image_config == {'aspect_ratio': '16:9'} + + +async def test_google_image_generation_tool_aspect_ratio_conflict(google_provider: GoogleProvider) -> None: + model = GoogleModel('gemini-2.5-flash-image', provider=google_provider) + params = ModelRequestParameters( + builtin_tools=[ + ImageGenerationTool(aspect_ratio='16:9'), + ImageGenerationTool(aspect_ratio='1:1'), + ] + ) + + with pytest.raises( + UserError, + match='Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.', + ): + model._get_tools(params) # pyright: ignore[reportPrivateUsage] + + async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider): model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider) From 9a3d8b02971526a38251944c6081404e9e493cb4 Mon Sep 17 00:00:00 2001 From: Michael Hahn Date: Thu, 13 Nov 2025 08:51:38 -0800 Subject: [PATCH 2/5] Update snapshot --- tests/models/test_model_request_parameters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/models/test_model_request_parameters.py b/tests/models/test_model_request_parameters.py index 1c8d0780e5..33bda3a1bb 100644 --- a/tests/models/test_model_request_parameters.py +++ b/tests/models/test_model_request_parameters.py @@ -91,6 +91,7 @@ def test_model_request_parameters_are_serializable(): 'partial_images': 0, 'quality': 'auto', 'size': '1024x1024', + 'aspect_ratio': None, }, {'kind': 'memory'}, { From 3ecda0ea185ed050604c90428fa7daab2dcee70e Mon Sep 17 00:00:00 2001 From: Michael Hahn Date: Thu, 13 Nov 2025 12:20:48 -0800 Subject: [PATCH 3/5] Add test to examples --- tests/test_examples.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_examples.py b/tests/test_examples.py index cc0ae7b593..10c5406827 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -678,6 +678,12 @@ async def model_logic( # noqa: C901 FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='160d47')), ] ) + elif m.content == 'Generate a wide illustration of an axolotl city skyline.': + return ModelResponse( + parts=[ + FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')), + ] + ) elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.': return ModelResponse( parts=[ From 1d8d19b0c73821442db2201cd12aa7e38c241d83 Mon Sep 17 00:00:00 2001 From: Michael Hahn Date: Thu, 13 Nov 2025 18:42:53 -0800 Subject: [PATCH 4/5] Review feedback --- docs/builtin-tools.md | 36 +++++++------- pydantic_ai_slim/pydantic_ai/builtin_tools.py | 2 +- pydantic_ai_slim/pydantic_ai/models/google.py | 49 +++++++++---------- tests/models/test_google.py | 16 ------ 4 files changed, 40 insertions(+), 63 deletions(-) diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md index 164accd94b..c7161ffa74 100644 --- a/docs/builtin-tools.md +++ b/docs/builtin-tools.md @@ -202,7 +202,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl | Provider | Supported | Notes | |----------|-----------|-------| | OpenAI Responses | ✅ | Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. | -| Google | ✅ | Supports the `aspect_ratio` parameter when explicitly provided. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md) and will always generate images, even if this built-in tool is not explicitly specified. | +| Google | ✅ | Limited parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md) and will always generate images, even if this built-in tool is not explicitly specified. | | Anthropic | ❌ | | | Groq | ❌ | | | Bedrock | ❌ | | @@ -248,23 +248,6 @@ assert isinstance(result.response.images[0], BinaryImage) _(This example is complete, it can be run "as is")_ -To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly: - -```py {title="image_generation_google_aspect_ratio.py"} -from pydantic_ai import Agent, BinaryImage, ImageGenerationTool - -agent = Agent( - 'google-gla:gemini-2.5-flash-image', - builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')], - output_type=BinaryImage, -) - -result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.') -assert isinstance(result.output, BinaryImage) -``` - -_(This example is complete, it can be run "as is")_ - The `ImageGenerationTool` can be used together with `output_type=BinaryImage` to get [image output](output.md#image-output). If the `ImageGenerationTool` built-in tool is not explicitly specified, it will be enabled automatically: ```py {title="image_generation_output.py"} @@ -308,7 +291,22 @@ assert isinstance(result.output, BinaryImage) _(This example is complete, it can be run "as is")_ -Gemini image models support the separate `aspect_ratio` parameter; set `aspect_ratio='16:9'`, for example, when calling `ImageGenerationTool` to generate wide or tall compositions. +To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly: + +```py {title="image_generation_google_aspect_ratio.py"} +from pydantic_ai import Agent, BinaryImage, ImageGenerationTool + +agent = Agent( + 'google-gla:gemini-2.5-flash-image', + builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')], + output_type=BinaryImage, +) + +result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.') +assert isinstance(result.output, BinaryImage) +``` + +_(This example is complete, it can be run "as is")_ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool]. diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py index 1fa592f6d7..b0b5ce8ac0 100644 --- a/pydantic_ai_slim/pydantic_ai/builtin_tools.py +++ b/pydantic_ai_slim/pydantic_ai/builtin_tools.py @@ -263,7 +263,7 @@ class ImageGenerationTool(AbstractBuiltinTool): Supported by: - * Google image-generation models (Gemini) when the tool is explicitly enabled. + * Google image-generation models (Gemini) """ kind: str = 'image_generation' diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 4d8b40ffab..61583080e5 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -353,10 +353,6 @@ def _get_tools( "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead." ) if tool.aspect_ratio: - if image_config and image_config.get('aspect_ratio') != tool.aspect_ratio: - raise UserError( - 'Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.' - ) image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio) else: # pragma: no cover raise UserError( @@ -448,29 +444,28 @@ async def _build_content_and_config( else: raise UserError('Google does not support setting ModelSettings.timeout to a httpx.Timeout') - config: GenerateContentConfigDict = { - 'http_options': http_options, - 'system_instruction': system_instruction, - 'temperature': model_settings.get('temperature'), - 'top_p': model_settings.get('top_p'), - 'max_output_tokens': model_settings.get('max_tokens'), - 'stop_sequences': model_settings.get('stop_sequences'), - 'presence_penalty': model_settings.get('presence_penalty'), - 'frequency_penalty': model_settings.get('frequency_penalty'), - 'seed': model_settings.get('seed'), - 'safety_settings': model_settings.get('google_safety_settings'), - 'thinking_config': model_settings.get('google_thinking_config'), - 'labels': model_settings.get('google_labels'), - 'media_resolution': model_settings.get('google_video_resolution'), - 'cached_content': model_settings.get('google_cached_content'), - 'tools': cast(ToolListUnionDict, tools), - 'tool_config': tool_config, - 'response_mime_type': response_mime_type, - 'response_schema': response_schema, - 'response_modalities': modalities, - } - if image_config: - config['image_config'] = image_config + config = GenerateContentConfigDict( + http_options=http_options, + system_instruction=system_instruction, + temperature=model_settings.get('temperature'), + top_p=model_settings.get('top_p'), + max_output_tokens=model_settings.get('max_tokens'), + stop_sequences=model_settings.get('stop_sequences'), + presence_penalty=model_settings.get('presence_penalty'), + frequency_penalty=model_settings.get('frequency_penalty'), + seed=model_settings.get('seed'), + safety_settings=model_settings.get('google_safety_settings'), + thinking_config=model_settings.get('google_thinking_config'), + labels=model_settings.get('google_labels'), + media_resolution=model_settings.get('google_video_resolution'), + cached_content=model_settings.get('google_cached_content'), + tools=cast(ToolListUnionDict, tools), + tool_config=tool_config, + response_mime_type=response_mime_type, + response_schema=response_schema, + response_modalities=modalities, + image_config=image_config, + ) return contents, config diff --git a/tests/models/test_google.py b/tests/models/test_google.py index e5b54f5b06..d9c9606e96 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -3143,22 +3143,6 @@ async def test_google_image_generation_tool_aspect_ratio(google_provider: Google assert image_config == {'aspect_ratio': '16:9'} -async def test_google_image_generation_tool_aspect_ratio_conflict(google_provider: GoogleProvider) -> None: - model = GoogleModel('gemini-2.5-flash-image', provider=google_provider) - params = ModelRequestParameters( - builtin_tools=[ - ImageGenerationTool(aspect_ratio='16:9'), - ImageGenerationTool(aspect_ratio='1:1'), - ] - ) - - with pytest.raises( - UserError, - match='Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.', - ): - model._get_tools(params) # pyright: ignore[reportPrivateUsage] - - async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider): model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider) From ab0730cb4b30413798059093bed7e7d02552b107 Mon Sep 17 00:00:00 2001 From: Michael Hahn Date: Thu, 13 Nov 2025 21:23:21 -0800 Subject: [PATCH 5/5] Review feedback re: openai --- docs/builtin-tools.md | 6 ++- pydantic_ai_slim/pydantic_ai/builtin_tools.py | 1 + pydantic_ai_slim/pydantic_ai/models/openai.py | 35 +++++++++++++++++- tests/models/test_openai_responses.py | 37 ++++++++++++++++++- 4 files changed, 74 insertions(+), 5 deletions(-) diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md index c7161ffa74..7d073378da 100644 --- a/docs/builtin-tools.md +++ b/docs/builtin-tools.md @@ -291,6 +291,10 @@ assert isinstance(result.output, BinaryImage) _(This example is complete, it can be run "as is")_ +OpenAI Responses models also respect the `aspect_ratio` parameter. Because the OpenAI API only exposes discrete image sizes, +PydanticAI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio +results in an error, and if you also set `size` it must match the computed value. + To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly: ```py {title="image_generation_google_aspect_ratio.py"} @@ -322,7 +326,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG | `partial_images` | ✅ | ❌ | | `quality` | ✅ | ❌ | | `size` | ✅ | ❌ | -| `aspect_ratio` | ❌ | ✅ | +| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ | ## URL Context Tool diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py index b0b5ce8ac0..f8fc766f2a 100644 --- a/pydantic_ai_slim/pydantic_ai/builtin_tools.py +++ b/pydantic_ai_slim/pydantic_ai/builtin_tools.py @@ -264,6 +264,7 @@ class ImageGenerationTool(AbstractBuiltinTool): Supported by: * Google image-generation models (Gemini) + * OpenAI Responses (maps '1:1', '2:3', and '3:2' to supported sizes) """ kind: str = 'image_generation' diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index ed1e711823..e878ef0856 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -18,7 +18,7 @@ from .._run_context import RunContext from .._thinking_part import split_content_into_text_and_thinking from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime -from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, MCPServerTool, WebSearchTool +from ..builtin_tools import CodeExecutionTool, ImageAspectRatio, ImageGenerationTool, MCPServerTool, WebSearchTool from ..exceptions import UserError from ..messages import ( AudioUrl, @@ -134,6 +134,36 @@ 'failed': 'error', } +_OPENAI_ASPECT_RATIO_TO_SIZE: dict[ImageAspectRatio, Literal['1024x1024', '1024x1536', '1536x1024']] = { + '1:1': '1024x1024', + '2:3': '1024x1536', + '3:2': '1536x1024', +} + + +def _resolve_openai_image_generation_size( + tool: ImageGenerationTool, +) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']: + """Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided.""" + aspect_ratio = tool.aspect_ratio + if aspect_ratio is None: + return tool.size + + mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio) + if mapped_size is None: + supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE) + raise UserError( + f'OpenAI image generation only supports `aspect_ratio` values: {supported}. ' + 'Specify one of those values or omit `aspect_ratio`.' + ) + + if tool.size not in ('auto', mapped_size): + raise UserError( + '`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.' + ) + + return mapped_size + class OpenAIChatModelSettings(ModelSettings, total=False): """Settings used for an OpenAI model request.""" @@ -1298,6 +1328,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) - tools.append(mcp_tool) elif isinstance(tool, ImageGenerationTool): # pragma: no branch has_image_generating_tool = True + size = _resolve_openai_image_generation_size(tool) tools.append( responses.tool_param.ImageGeneration( type='image_generation', @@ -1308,7 +1339,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) - output_format=tool.output_format or 'png', partial_images=tool.partial_images, quality=tool.quality, - size=tool.size, + size=size, ) ) else: diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 7433841cde..d1490b1fd7 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1,7 +1,7 @@ import json import re from dataclasses import replace -from typing import Any, cast +from typing import Any, Literal, cast import pytest from inline_snapshot import snapshot @@ -32,17 +32,19 @@ ToolCallPartDelta, ToolReturnPart, UnexpectedModelBehavior, + UserError, UserPromptPart, capture_run_messages, ) from pydantic_ai.agent import Agent -from pydantic_ai.builtin_tools import CodeExecutionTool, MCPServerTool, WebSearchTool +from pydantic_ai.builtin_tools import CodeExecutionTool, ImageAspectRatio, MCPServerTool, WebSearchTool from pydantic_ai.exceptions import ModelHTTPError, ModelRetry from pydantic_ai.messages import ( BuiltinToolCallEvent, # pyright: ignore[reportDeprecated] BuiltinToolResultEvent, # pyright: ignore[reportDeprecated] ) from pydantic_ai.models import ModelRequestParameters +from pydantic_ai.models.openai import _resolve_openai_image_generation_size # pyright: ignore[reportPrivateUsage] from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput from pydantic_ai.profiles.openai import openai_model_profile from pydantic_ai.tools import ToolDefinition @@ -124,6 +126,37 @@ async def test_openai_responses_image_detail_vendor_metadata(allow_model_request assert all(part['detail'] == 'high' for part in image_parts) +@pytest.mark.parametrize( + ('aspect_ratio', 'explicit_size', 'expected_size'), + [ + ('1:1', 'auto', '1024x1024'), + ('2:3', '1024x1536', '1024x1536'), + ('3:2', 'auto', '1536x1024'), + ], +) +def test_openai_responses_image_generation_tool_aspect_ratio_mapping( + aspect_ratio: ImageAspectRatio, + explicit_size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'], + expected_size: Literal['1024x1024', '1024x1536', '1536x1024'], +) -> None: + tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=explicit_size) + assert _resolve_openai_image_generation_size(tool) == expected_size + + +def test_openai_responses_image_generation_tool_aspect_ratio_invalid() -> None: + tool = ImageGenerationTool(aspect_ratio='16:9') + + with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'): + _resolve_openai_image_generation_size(tool) + + +def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size() -> None: + tool = ImageGenerationTool(aspect_ratio='1:1', size='1536x1024') + + with pytest.raises(UserError, match='cannot combine `aspect_ratio` with a conflicting `size`'): + _resolve_openai_image_generation_size(tool) + + async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str): model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))