Add support for aspect ratio in gemini image generation

mwildehahn · mwildehahn · commit e7b6dece357e · 2025-11-12T23:01:42.000-08:00
diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md
@@ -202,7 +202,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl
 | Provider | Supported | Notes |
 |----------|-----------|-------|
 | OpenAI Responses | ✅ | Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. |
-| Google | ✅ | No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md). These models will always generate images, even if this built-in tool is not explicitly specified. |
+| Google | ✅ | Supports the `aspect_ratio` parameter when explicitly provided. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md) and will always generate images, even if this built-in tool is not explicitly specified. |
 | Anthropic | ❌ | |
 | Groq | ❌ | |
 | Bedrock | ❌ | |
@@ -248,6 +248,23 @@ assert isinstance(result.response.images[0], BinaryImage)
 
 _(This example is complete, it can be run "as is")_
 
+To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly:
+
+```py {title="image_generation_google_aspect_ratio.py"}
+from pydantic_ai import Agent, BinaryImage, ImageGenerationTool
+
+agent = Agent(
+    'google-gla:gemini-2.5-flash-image',
+    builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')],
+    output_type=BinaryImage,
+)
+
+result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.')
+assert isinstance(result.output, BinaryImage)
+```
+
+_(This example is complete, it can be run "as is")_
+
 The `ImageGenerationTool` can be used together with `output_type=BinaryImage` to get [image output](output.md#image-output). If the `ImageGenerationTool` built-in tool is not explicitly specified, it will be enabled automatically:
 
 ```py {title="image_generation_output.py"}
@@ -291,6 +308,8 @@ assert isinstance(result.output, BinaryImage)
 
 _(This example is complete, it can be run "as is")_
 
+Gemini image models support the separate `aspect_ratio` parameter; set `aspect_ratio='16:9'`, for example, when calling `ImageGenerationTool` to generate wide or tall compositions.
+
 For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].
 
 #### Provider Support
@@ -305,6 +324,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
 | `partial_images` | ✅ | ❌ |
 | `quality` | ✅ | ❌ |
 | `size` | ✅ | ❌ |
+| `aspect_ratio` | ❌ | ✅ |
 
 ## URL Context Tool
 
diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py
@@ -21,6 +21,9 @@
 
 _BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {}
 
+ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5']
+"""Supported aspect ratios for image generation tools."""
+
 
 @dataclass(kw_only=True)
 class AbstractBuiltinTool(ABC):
@@ -255,6 +258,14 @@ class ImageGenerationTool(AbstractBuiltinTool):
     * OpenAI Responses
     """
 
+    aspect_ratio: ImageAspectRatio | None = None
+    """The aspect ratio to use for generated images.
+
+    Supported by:
+
+    * Google image-generation models (Gemini) when the tool is explicitly enabled.
+    """
+
     kind: str = 'image_generation'
     """The kind of tool."""
 
diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -72,6 +72,7 @@
         GoogleSearchDict,
         GroundingMetadata,
         HttpOptionsDict,
+        ImageConfigDict,
         MediaResolution,
         Modality,
         Part,
@@ -325,12 +326,16 @@ async def request_stream(
         response = await self._generate_content(messages, True, model_settings, model_request_parameters)
         yield await self._process_streamed_response(response, model_request_parameters)  # type: ignore
 
-    def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None:
+    def _get_tools(
+        self, model_request_parameters: ModelRequestParameters
+    ) -> tuple[list[ToolDict] | None, ImageConfigDict | None]:
         tools: list[ToolDict] = [
             ToolDict(function_declarations=[_function_declaration_from_tool(t)])
             for t in model_request_parameters.tool_defs.values()
         ]
 
+        image_config: ImageConfigDict | None = None
+
         if model_request_parameters.builtin_tools:
             if model_request_parameters.function_tools:
                 raise UserError('Google does not support function tools and built-in tools at the same time.')
@@ -347,11 +352,17 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T
                         raise UserError(
                             "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
                         )
+                    if tool.aspect_ratio:
+                        if image_config and image_config.get('aspect_ratio') != tool.aspect_ratio:
+                            raise UserError(
+                                'Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.'
+                            )
+                        image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)
                 else:  # pragma: no cover
                     raise UserError(
                         f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'
                     )
-        return tools or None
+        return tools or None, image_config
 
     def _get_tool_config(
         self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None
@@ -401,7 +412,7 @@ async def _build_content_and_config(
         model_settings: GoogleModelSettings,
         model_request_parameters: ModelRequestParameters,
     ) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]:
-        tools = self._get_tools(model_request_parameters)
+        tools, image_config = self._get_tools(model_request_parameters)
         if tools and not self.profile.supports_tools:
             raise UserError('Tools are not supported by this model.')
 
@@ -437,27 +448,30 @@ async def _build_content_and_config(
             else:
                 raise UserError('Google does not support setting ModelSettings.timeout to a httpx.Timeout')
 
-        config = GenerateContentConfigDict(
-            http_options=http_options,
-            system_instruction=system_instruction,
-            temperature=model_settings.get('temperature'),
-            top_p=model_settings.get('top_p'),
-            max_output_tokens=model_settings.get('max_tokens'),
-            stop_sequences=model_settings.get('stop_sequences'),
-            presence_penalty=model_settings.get('presence_penalty'),
-            frequency_penalty=model_settings.get('frequency_penalty'),
-            seed=model_settings.get('seed'),
-            safety_settings=model_settings.get('google_safety_settings'),
-            thinking_config=model_settings.get('google_thinking_config'),
-            labels=model_settings.get('google_labels'),
-            media_resolution=model_settings.get('google_video_resolution'),
-            cached_content=model_settings.get('google_cached_content'),
-            tools=cast(ToolListUnionDict, tools),
-            tool_config=tool_config,
-            response_mime_type=response_mime_type,
-            response_schema=response_schema,
-            response_modalities=modalities,
-        )
+        config: GenerateContentConfigDict = {
+            'http_options': http_options,
+            'system_instruction': system_instruction,
+            'temperature': model_settings.get('temperature'),
+            'top_p': model_settings.get('top_p'),
+            'max_output_tokens': model_settings.get('max_tokens'),
+            'stop_sequences': model_settings.get('stop_sequences'),
+            'presence_penalty': model_settings.get('presence_penalty'),
+            'frequency_penalty': model_settings.get('frequency_penalty'),
+            'seed': model_settings.get('seed'),
+            'safety_settings': model_settings.get('google_safety_settings'),
+            'thinking_config': model_settings.get('google_thinking_config'),
+            'labels': model_settings.get('google_labels'),
+            'media_resolution': model_settings.get('google_video_resolution'),
+            'cached_content': model_settings.get('google_cached_content'),
+            'tools': cast(ToolListUnionDict, tools),
+            'tool_config': tool_config,
+            'response_mime_type': response_mime_type,
+            'response_schema': response_schema,
+            'response_modalities': modalities,
+        }
+        if image_config:
+            config['image_config'] = image_config
+
         return contents, config
 
     def _process_response(self, response: GenerateContentResponse) -> ModelResponse:
diff --git a/tests/models/test_google.py b/tests/models/test_google.py
@@ -3134,6 +3134,31 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p
         await agent.run('Generate an image of an axolotl.')
 
 
+async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None:
+    model = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
+    params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')])
+
+    tools, image_config = model._get_tools(params)  # pyright: ignore[reportPrivateUsage]
+    assert tools is None
+    assert image_config == {'aspect_ratio': '16:9'}
+
+
+async def test_google_image_generation_tool_aspect_ratio_conflict(google_provider: GoogleProvider) -> None:
+    model = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
+    params = ModelRequestParameters(
+        builtin_tools=[
+            ImageGenerationTool(aspect_ratio='16:9'),
+            ImageGenerationTool(aspect_ratio='1:1'),
+        ]
+    )
+
+    with pytest.raises(
+        UserError,
+        match='Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.',
+    ):
+        model._get_tools(params)  # pyright: ignore[reportPrivateUsage]
+
+
 async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
     model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)