Skip to content

Commit e7b6dec

Browse files
committed
Add support for aspect ratio in gemini image generation
1 parent 365b67b commit e7b6dec

File tree

4 files changed

+95
-25
lines changed

4 files changed

+95
-25
lines changed

docs/builtin-tools.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl
202202
| Provider | Supported | Notes |
203203
|----------|-----------|-------|
204204
| OpenAI Responses || Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. |
205-
| Google || No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md). These models will always generate images, even if this built-in tool is not explicitly specified. |
205+
| Google || Supports the `aspect_ratio` parameter when explicitly provided. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md) and will always generate images, even if this built-in tool is not explicitly specified. |
206206
| Anthropic || |
207207
| Groq || |
208208
| Bedrock || |
@@ -248,6 +248,23 @@ assert isinstance(result.response.images[0], BinaryImage)
248248

249249
_(This example is complete, it can be run "as is")_
250250

251+
To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly:
252+
253+
```py {title="image_generation_google_aspect_ratio.py"}
254+
from pydantic_ai import Agent, BinaryImage, ImageGenerationTool
255+
256+
agent = Agent(
257+
'google-gla:gemini-2.5-flash-image',
258+
builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')],
259+
output_type=BinaryImage,
260+
)
261+
262+
result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.')
263+
assert isinstance(result.output, BinaryImage)
264+
```
265+
266+
_(This example is complete, it can be run "as is")_
267+
251268
The `ImageGenerationTool` can be used together with `output_type=BinaryImage` to get [image output](output.md#image-output). If the `ImageGenerationTool` built-in tool is not explicitly specified, it will be enabled automatically:
252269

253270
```py {title="image_generation_output.py"}
@@ -291,6 +308,8 @@ assert isinstance(result.output, BinaryImage)
291308

292309
_(This example is complete, it can be run "as is")_
293310

311+
Gemini image models support the separate `aspect_ratio` parameter; set `aspect_ratio='16:9'`, for example, when calling `ImageGenerationTool` to generate wide or tall compositions.
312+
294313
For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].
295314

296315
#### Provider Support
@@ -305,6 +324,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
305324
| `partial_images` |||
306325
| `quality` |||
307326
| `size` |||
327+
| `aspect_ratio` |||
308328

309329
## URL Context Tool
310330

pydantic_ai_slim/pydantic_ai/builtin_tools.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121

2222
_BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {}
2323

24+
ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5']
25+
"""Supported aspect ratios for image generation tools."""
26+
2427

2528
@dataclass(kw_only=True)
2629
class AbstractBuiltinTool(ABC):
@@ -255,6 +258,14 @@ class ImageGenerationTool(AbstractBuiltinTool):
255258
* OpenAI Responses
256259
"""
257260

261+
aspect_ratio: ImageAspectRatio | None = None
262+
"""The aspect ratio to use for generated images.
263+
264+
Supported by:
265+
266+
* Google image-generation models (Gemini) when the tool is explicitly enabled.
267+
"""
268+
258269
kind: str = 'image_generation'
259270
"""The kind of tool."""
260271

pydantic_ai_slim/pydantic_ai/models/google.py

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
GoogleSearchDict,
7373
GroundingMetadata,
7474
HttpOptionsDict,
75+
ImageConfigDict,
7576
MediaResolution,
7677
Modality,
7778
Part,
@@ -325,12 +326,16 @@ async def request_stream(
325326
response = await self._generate_content(messages, True, model_settings, model_request_parameters)
326327
yield await self._process_streamed_response(response, model_request_parameters) # type: ignore
327328

328-
def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None:
329+
def _get_tools(
330+
self, model_request_parameters: ModelRequestParameters
331+
) -> tuple[list[ToolDict] | None, ImageConfigDict | None]:
329332
tools: list[ToolDict] = [
330333
ToolDict(function_declarations=[_function_declaration_from_tool(t)])
331334
for t in model_request_parameters.tool_defs.values()
332335
]
333336

337+
image_config: ImageConfigDict | None = None
338+
334339
if model_request_parameters.builtin_tools:
335340
if model_request_parameters.function_tools:
336341
raise UserError('Google does not support function tools and built-in tools at the same time.')
@@ -347,11 +352,17 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T
347352
raise UserError(
348353
"`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
349354
)
355+
if tool.aspect_ratio:
356+
if image_config and image_config.get('aspect_ratio') != tool.aspect_ratio:
357+
raise UserError(
358+
'Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.'
359+
)
360+
image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)
350361
else: # pragma: no cover
351362
raise UserError(
352363
f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'
353364
)
354-
return tools or None
365+
return tools or None, image_config
355366

356367
def _get_tool_config(
357368
self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None
@@ -401,7 +412,7 @@ async def _build_content_and_config(
401412
model_settings: GoogleModelSettings,
402413
model_request_parameters: ModelRequestParameters,
403414
) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]:
404-
tools = self._get_tools(model_request_parameters)
415+
tools, image_config = self._get_tools(model_request_parameters)
405416
if tools and not self.profile.supports_tools:
406417
raise UserError('Tools are not supported by this model.')
407418

@@ -437,27 +448,30 @@ async def _build_content_and_config(
437448
else:
438449
raise UserError('Google does not support setting ModelSettings.timeout to a httpx.Timeout')
439450

440-
config = GenerateContentConfigDict(
441-
http_options=http_options,
442-
system_instruction=system_instruction,
443-
temperature=model_settings.get('temperature'),
444-
top_p=model_settings.get('top_p'),
445-
max_output_tokens=model_settings.get('max_tokens'),
446-
stop_sequences=model_settings.get('stop_sequences'),
447-
presence_penalty=model_settings.get('presence_penalty'),
448-
frequency_penalty=model_settings.get('frequency_penalty'),
449-
seed=model_settings.get('seed'),
450-
safety_settings=model_settings.get('google_safety_settings'),
451-
thinking_config=model_settings.get('google_thinking_config'),
452-
labels=model_settings.get('google_labels'),
453-
media_resolution=model_settings.get('google_video_resolution'),
454-
cached_content=model_settings.get('google_cached_content'),
455-
tools=cast(ToolListUnionDict, tools),
456-
tool_config=tool_config,
457-
response_mime_type=response_mime_type,
458-
response_schema=response_schema,
459-
response_modalities=modalities,
460-
)
451+
config: GenerateContentConfigDict = {
452+
'http_options': http_options,
453+
'system_instruction': system_instruction,
454+
'temperature': model_settings.get('temperature'),
455+
'top_p': model_settings.get('top_p'),
456+
'max_output_tokens': model_settings.get('max_tokens'),
457+
'stop_sequences': model_settings.get('stop_sequences'),
458+
'presence_penalty': model_settings.get('presence_penalty'),
459+
'frequency_penalty': model_settings.get('frequency_penalty'),
460+
'seed': model_settings.get('seed'),
461+
'safety_settings': model_settings.get('google_safety_settings'),
462+
'thinking_config': model_settings.get('google_thinking_config'),
463+
'labels': model_settings.get('google_labels'),
464+
'media_resolution': model_settings.get('google_video_resolution'),
465+
'cached_content': model_settings.get('google_cached_content'),
466+
'tools': cast(ToolListUnionDict, tools),
467+
'tool_config': tool_config,
468+
'response_mime_type': response_mime_type,
469+
'response_schema': response_schema,
470+
'response_modalities': modalities,
471+
}
472+
if image_config:
473+
config['image_config'] = image_config
474+
461475
return contents, config
462476

463477
def _process_response(self, response: GenerateContentResponse) -> ModelResponse:

tests/models/test_google.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3134,6 +3134,31 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p
31343134
await agent.run('Generate an image of an axolotl.')
31353135

31363136

3137+
async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None:
3138+
model = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
3139+
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')])
3140+
3141+
tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage]
3142+
assert tools is None
3143+
assert image_config == {'aspect_ratio': '16:9'}
3144+
3145+
3146+
async def test_google_image_generation_tool_aspect_ratio_conflict(google_provider: GoogleProvider) -> None:
3147+
model = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
3148+
params = ModelRequestParameters(
3149+
builtin_tools=[
3150+
ImageGenerationTool(aspect_ratio='16:9'),
3151+
ImageGenerationTool(aspect_ratio='1:1'),
3152+
]
3153+
)
3154+
3155+
with pytest.raises(
3156+
UserError,
3157+
match='Multiple `ImageGenerationTool` instances with different `aspect_ratio` values are not supported.',
3158+
):
3159+
model._get_tools(params) # pyright: ignore[reportPrivateUsage]
3160+
3161+
31373162
async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
31383163
model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)
31393164

0 commit comments

Comments
 (0)