diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md index 1e4bdfd279..7d073378da 100644 --- a/docs/builtin-tools.md +++ b/docs/builtin-tools.md @@ -202,7 +202,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl | Provider | Supported | Notes | |----------|-----------|-------| | OpenAI Responses | ✅ | Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. | -| Google | ✅ | No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md). These models will always generate images, even if this built-in tool is not explicitly specified. | +| Google | ✅ | Limited parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md) and will always generate images, even if this built-in tool is not explicitly specified. | | Anthropic | ❌ | | | Groq | ❌ | | | Bedrock | ❌ | | @@ -291,6 +291,27 @@ assert isinstance(result.output, BinaryImage) _(This example is complete, it can be run "as is")_ +OpenAI Responses models also respect the `aspect_ratio` parameter. Because the OpenAI API only exposes discrete image sizes, +PydanticAI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio +results in an error, and if you also set `size` it must match the computed value. + +To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly: + +```py {title="image_generation_google_aspect_ratio.py"} +from pydantic_ai import Agent, BinaryImage, ImageGenerationTool + +agent = Agent( + 'google-gla:gemini-2.5-flash-image', + builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')], + output_type=BinaryImage, +) + +result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.') +assert isinstance(result.output, BinaryImage) +``` + +_(This example is complete, it can be run "as is")_ + For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool]. #### Provider Support @@ -305,6 +326,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG | `partial_images` | ✅ | ❌ | | `quality` | ✅ | ❌ | | `size` | ✅ | ❌ | +| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ | ## URL Context Tool diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py index 5559b3124a..f8fc766f2a 100644 --- a/pydantic_ai_slim/pydantic_ai/builtin_tools.py +++ b/pydantic_ai_slim/pydantic_ai/builtin_tools.py @@ -21,6 +21,9 @@ _BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {} +ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5'] +"""Supported aspect ratios for image generation tools.""" + @dataclass(kw_only=True) class AbstractBuiltinTool(ABC): @@ -255,6 +258,15 @@ class ImageGenerationTool(AbstractBuiltinTool): * OpenAI Responses """ + aspect_ratio: ImageAspectRatio | None = None + """The aspect ratio to use for generated images. + + Supported by: + + * Google image-generation models (Gemini) + * OpenAI Responses (maps '1:1', '2:3', and '3:2' to supported sizes) + """ + kind: str = 'image_generation' """The kind of tool.""" diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 071f65fa66..61583080e5 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -72,6 +72,7 @@ GoogleSearchDict, GroundingMetadata, HttpOptionsDict, + ImageConfigDict, MediaResolution, Modality, Part, @@ -325,12 +326,16 @@ async def request_stream( response = await self._generate_content(messages, True, model_settings, model_request_parameters) yield await self._process_streamed_response(response, model_request_parameters) # type: ignore - def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None: + def _get_tools( + self, model_request_parameters: ModelRequestParameters + ) -> tuple[list[ToolDict] | None, ImageConfigDict | None]: tools: list[ToolDict] = [ ToolDict(function_declarations=[_function_declaration_from_tool(t)]) for t in model_request_parameters.tool_defs.values() ] + image_config: ImageConfigDict | None = None + if model_request_parameters.builtin_tools: if model_request_parameters.function_tools: raise UserError('Google does not support function tools and built-in tools at the same time.') @@ -347,11 +352,13 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T raise UserError( "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead." ) + if tool.aspect_ratio: + image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio) else: # pragma: no cover raise UserError( f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.' ) - return tools or None + return tools or None, image_config def _get_tool_config( self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None @@ -401,7 +408,7 @@ async def _build_content_and_config( model_settings: GoogleModelSettings, model_request_parameters: ModelRequestParameters, ) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]: - tools = self._get_tools(model_request_parameters) + tools, image_config = self._get_tools(model_request_parameters) if tools and not self.profile.supports_tools: raise UserError('Tools are not supported by this model.') @@ -457,7 +464,9 @@ async def _build_content_and_config( response_mime_type=response_mime_type, response_schema=response_schema, response_modalities=modalities, + image_config=image_config, ) + return contents, config def _process_response(self, response: GenerateContentResponse) -> ModelResponse: diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index ed1e711823..e878ef0856 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -18,7 +18,7 @@ from .._run_context import RunContext from .._thinking_part import split_content_into_text_and_thinking from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime -from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, MCPServerTool, WebSearchTool +from ..builtin_tools import CodeExecutionTool, ImageAspectRatio, ImageGenerationTool, MCPServerTool, WebSearchTool from ..exceptions import UserError from ..messages import ( AudioUrl, @@ -134,6 +134,36 @@ 'failed': 'error', } +_OPENAI_ASPECT_RATIO_TO_SIZE: dict[ImageAspectRatio, Literal['1024x1024', '1024x1536', '1536x1024']] = { + '1:1': '1024x1024', + '2:3': '1024x1536', + '3:2': '1536x1024', +} + + +def _resolve_openai_image_generation_size( + tool: ImageGenerationTool, +) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']: + """Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided.""" + aspect_ratio = tool.aspect_ratio + if aspect_ratio is None: + return tool.size + + mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio) + if mapped_size is None: + supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE) + raise UserError( + f'OpenAI image generation only supports `aspect_ratio` values: {supported}. ' + 'Specify one of those values or omit `aspect_ratio`.' + ) + + if tool.size not in ('auto', mapped_size): + raise UserError( + '`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.' + ) + + return mapped_size + class OpenAIChatModelSettings(ModelSettings, total=False): """Settings used for an OpenAI model request.""" @@ -1298,6 +1328,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) - tools.append(mcp_tool) elif isinstance(tool, ImageGenerationTool): # pragma: no branch has_image_generating_tool = True + size = _resolve_openai_image_generation_size(tool) tools.append( responses.tool_param.ImageGeneration( type='image_generation', @@ -1308,7 +1339,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) - output_format=tool.output_format or 'png', partial_images=tool.partial_images, quality=tool.quality, - size=tool.size, + size=size, ) ) else: diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 82332f38ef..d9c9606e96 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -3134,6 +3134,15 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p await agent.run('Generate an image of an axolotl.') +async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None: + model = GoogleModel('gemini-2.5-flash-image', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')]) + + tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage] + assert tools is None + assert image_config == {'aspect_ratio': '16:9'} + + async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider): model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider) diff --git a/tests/models/test_model_request_parameters.py b/tests/models/test_model_request_parameters.py index 1c8d0780e5..33bda3a1bb 100644 --- a/tests/models/test_model_request_parameters.py +++ b/tests/models/test_model_request_parameters.py @@ -91,6 +91,7 @@ def test_model_request_parameters_are_serializable(): 'partial_images': 0, 'quality': 'auto', 'size': '1024x1024', + 'aspect_ratio': None, }, {'kind': 'memory'}, { diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 7433841cde..d1490b1fd7 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1,7 +1,7 @@ import json import re from dataclasses import replace -from typing import Any, cast +from typing import Any, Literal, cast import pytest from inline_snapshot import snapshot @@ -32,17 +32,19 @@ ToolCallPartDelta, ToolReturnPart, UnexpectedModelBehavior, + UserError, UserPromptPart, capture_run_messages, ) from pydantic_ai.agent import Agent -from pydantic_ai.builtin_tools import CodeExecutionTool, MCPServerTool, WebSearchTool +from pydantic_ai.builtin_tools import CodeExecutionTool, ImageAspectRatio, MCPServerTool, WebSearchTool from pydantic_ai.exceptions import ModelHTTPError, ModelRetry from pydantic_ai.messages import ( BuiltinToolCallEvent, # pyright: ignore[reportDeprecated] BuiltinToolResultEvent, # pyright: ignore[reportDeprecated] ) from pydantic_ai.models import ModelRequestParameters +from pydantic_ai.models.openai import _resolve_openai_image_generation_size # pyright: ignore[reportPrivateUsage] from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput from pydantic_ai.profiles.openai import openai_model_profile from pydantic_ai.tools import ToolDefinition @@ -124,6 +126,37 @@ async def test_openai_responses_image_detail_vendor_metadata(allow_model_request assert all(part['detail'] == 'high' for part in image_parts) +@pytest.mark.parametrize( + ('aspect_ratio', 'explicit_size', 'expected_size'), + [ + ('1:1', 'auto', '1024x1024'), + ('2:3', '1024x1536', '1024x1536'), + ('3:2', 'auto', '1536x1024'), + ], +) +def test_openai_responses_image_generation_tool_aspect_ratio_mapping( + aspect_ratio: ImageAspectRatio, + explicit_size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'], + expected_size: Literal['1024x1024', '1024x1536', '1536x1024'], +) -> None: + tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=explicit_size) + assert _resolve_openai_image_generation_size(tool) == expected_size + + +def test_openai_responses_image_generation_tool_aspect_ratio_invalid() -> None: + tool = ImageGenerationTool(aspect_ratio='16:9') + + with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'): + _resolve_openai_image_generation_size(tool) + + +def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size() -> None: + tool = ImageGenerationTool(aspect_ratio='1:1', size='1536x1024') + + with pytest.raises(UserError, match='cannot combine `aspect_ratio` with a conflicting `size`'): + _resolve_openai_image_generation_size(tool) + + async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str): model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key)) diff --git a/tests/test_examples.py b/tests/test_examples.py index 85bae688d0..ee48f20a1f 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -679,6 +679,12 @@ async def model_logic( # noqa: C901 FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='160d47')), ] ) + elif m.content == 'Generate a wide illustration of an axolotl city skyline.': + return ModelResponse( + parts=[ + FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')), + ] + ) elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.': return ModelResponse( parts=[