Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion docs/builtin-tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl
| Provider | Supported | Notes |
|----------|-----------|-------|
| OpenAI Responses | ✅ | Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. |
| Google | ✅ | No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md). These models will always generate images, even if this built-in tool is not explicitly specified. |
| Google | ✅ | Limited parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image`. These models do not support [structured output](output.md) or [function tools](tools.md) and will always generate images, even if this built-in tool is not explicitly specified. |
| Anthropic | ❌ | |
| Groq | ❌ | |
| Bedrock | ❌ | |
Expand Down Expand Up @@ -291,6 +291,27 @@ assert isinstance(result.output, BinaryImage)

_(This example is complete, it can be run "as is")_

OpenAI Responses models also respect the `aspect_ratio` parameter. Because the OpenAI API only exposes discrete image sizes,
PydanticAI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
PydanticAI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio
Pydantic AI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio

results in an error, and if you also set `size` it must match the computed value.

To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly:

```py {title="image_generation_google_aspect_ratio.py"}
from pydantic_ai import Agent, BinaryImage, ImageGenerationTool

agent = Agent(
'google-gla:gemini-2.5-flash-image',
builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')],
output_type=BinaryImage,
)

result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.')
assert isinstance(result.output, BinaryImage)
```

_(This example is complete, it can be run "as is")_

For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].

#### Provider Support
Expand All @@ -305,6 +326,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
| `partial_images` | ✅ | ❌ |
| `quality` | ✅ | ❌ |
| `size` | ✅ | ❌ |
| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ |
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) ||
| `aspect_ratio` | ✅ (`1:1`, `2:3`, `3:2`) ||

Ideally we'd use a different emoji for partial support; any ideas? 😄


## URL Context Tool

Expand Down
12 changes: 12 additions & 0 deletions pydantic_ai_slim/pydantic_ai/builtin_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

_BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {}

ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5']
"""Supported aspect ratios for image generation tools."""


@dataclass(kw_only=True)
class AbstractBuiltinTool(ABC):
Expand Down Expand Up @@ -255,6 +258,15 @@ class ImageGenerationTool(AbstractBuiltinTool):
* OpenAI Responses
"""

aspect_ratio: ImageAspectRatio | None = None
"""The aspect ratio to use for generated images.

Supported by:

* Google image-generation models (Gemini)
* OpenAI Responses (maps '1:1', '2:3', and '3:2' to supported sizes)
"""

kind: str = 'image_generation'
"""The kind of tool."""

Expand Down
15 changes: 12 additions & 3 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
GoogleSearchDict,
GroundingMetadata,
HttpOptionsDict,
ImageConfigDict,
MediaResolution,
Modality,
Part,
Expand Down Expand Up @@ -325,12 +326,16 @@ async def request_stream(
response = await self._generate_content(messages, True, model_settings, model_request_parameters)
yield await self._process_streamed_response(response, model_request_parameters) # type: ignore

def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None:
def _get_tools(
self, model_request_parameters: ModelRequestParameters
) -> tuple[list[ToolDict] | None, ImageConfigDict | None]:
tools: list[ToolDict] = [
ToolDict(function_declarations=[_function_declaration_from_tool(t)])
for t in model_request_parameters.tool_defs.values()
]

image_config: ImageConfigDict | None = None

if model_request_parameters.builtin_tools:
if model_request_parameters.function_tools:
raise UserError('Google does not support function tools and built-in tools at the same time.')
Expand All @@ -347,11 +352,13 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T
raise UserError(
"`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
)
if tool.aspect_ratio:
image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)
else: # pragma: no cover
raise UserError(
f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'
)
return tools or None
return tools or None, image_config

def _get_tool_config(
self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None
Expand Down Expand Up @@ -401,7 +408,7 @@ async def _build_content_and_config(
model_settings: GoogleModelSettings,
model_request_parameters: ModelRequestParameters,
) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]:
tools = self._get_tools(model_request_parameters)
tools, image_config = self._get_tools(model_request_parameters)
if tools and not self.profile.supports_tools:
raise UserError('Tools are not supported by this model.')

Expand Down Expand Up @@ -457,7 +464,9 @@ async def _build_content_and_config(
response_mime_type=response_mime_type,
response_schema=response_schema,
response_modalities=modalities,
image_config=image_config,
)

return contents, config

def _process_response(self, response: GenerateContentResponse) -> ModelResponse:
Expand Down
35 changes: 33 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .._run_context import RunContext
from .._thinking_part import split_content_into_text_and_thinking
from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime
from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, MCPServerTool, WebSearchTool
from ..builtin_tools import CodeExecutionTool, ImageAspectRatio, ImageGenerationTool, MCPServerTool, WebSearchTool
from ..exceptions import UserError
from ..messages import (
AudioUrl,
Expand Down Expand Up @@ -134,6 +134,36 @@
'failed': 'error',
}

_OPENAI_ASPECT_RATIO_TO_SIZE: dict[ImageAspectRatio, Literal['1024x1024', '1024x1536', '1536x1024']] = {
'1:1': '1024x1024',
'2:3': '1024x1536',
'3:2': '1536x1024',
}


def _resolve_openai_image_generation_size(
tool: ImageGenerationTool,
) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']:
"""Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided."""
aspect_ratio = tool.aspect_ratio
if aspect_ratio is None:
return tool.size

mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio)
if mapped_size is None:
supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE)
raise UserError(
f'OpenAI image generation only supports `aspect_ratio` values: {supported}. '
'Specify one of those values or omit `aspect_ratio`.'
)

if tool.size not in ('auto', mapped_size):
raise UserError(
'`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.'
)

return mapped_size


class OpenAIChatModelSettings(ModelSettings, total=False):
"""Settings used for an OpenAI model request."""
Expand Down Expand Up @@ -1298,6 +1328,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -
tools.append(mcp_tool)
elif isinstance(tool, ImageGenerationTool): # pragma: no branch
has_image_generating_tool = True
size = _resolve_openai_image_generation_size(tool)
tools.append(
responses.tool_param.ImageGeneration(
type='image_generation',
Expand All @@ -1308,7 +1339,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -
output_format=tool.output_format or 'png',
partial_images=tool.partial_images,
quality=tool.quality,
size=tool.size,
size=size,
)
)
else:
Expand Down
9 changes: 9 additions & 0 deletions tests/models/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -3134,6 +3134,15 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p
await agent.run('Generate an image of an axolotl.')


async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None:
model = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')])

tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage]
assert tools is None
assert image_config == {'aspect_ratio': '16:9'}


async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)

Expand Down
1 change: 1 addition & 0 deletions tests/models/test_model_request_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def test_model_request_parameters_are_serializable():
'partial_images': 0,
'quality': 'auto',
'size': '1024x1024',
'aspect_ratio': None,
},
{'kind': 'memory'},
{
Expand Down
37 changes: 35 additions & 2 deletions tests/models/test_openai_responses.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import re
from dataclasses import replace
from typing import Any, cast
from typing import Any, Literal, cast

import pytest
from inline_snapshot import snapshot
Expand Down Expand Up @@ -32,17 +32,19 @@
ToolCallPartDelta,
ToolReturnPart,
UnexpectedModelBehavior,
UserError,
UserPromptPart,
capture_run_messages,
)
from pydantic_ai.agent import Agent
from pydantic_ai.builtin_tools import CodeExecutionTool, MCPServerTool, WebSearchTool
from pydantic_ai.builtin_tools import CodeExecutionTool, ImageAspectRatio, MCPServerTool, WebSearchTool
from pydantic_ai.exceptions import ModelHTTPError, ModelRetry
from pydantic_ai.messages import (
BuiltinToolCallEvent, # pyright: ignore[reportDeprecated]
BuiltinToolResultEvent, # pyright: ignore[reportDeprecated]
)
from pydantic_ai.models import ModelRequestParameters
from pydantic_ai.models.openai import _resolve_openai_image_generation_size # pyright: ignore[reportPrivateUsage]
from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
from pydantic_ai.profiles.openai import openai_model_profile
from pydantic_ai.tools import ToolDefinition
Expand Down Expand Up @@ -124,6 +126,37 @@ async def test_openai_responses_image_detail_vendor_metadata(allow_model_request
assert all(part['detail'] == 'high' for part in image_parts)


@pytest.mark.parametrize(
('aspect_ratio', 'explicit_size', 'expected_size'),
[
('1:1', 'auto', '1024x1024'),
('2:3', '1024x1536', '1024x1536'),
('3:2', 'auto', '1536x1024'),
],
)
def test_openai_responses_image_generation_tool_aspect_ratio_mapping(
aspect_ratio: ImageAspectRatio,
explicit_size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'],
expected_size: Literal['1024x1024', '1024x1536', '1536x1024'],
) -> None:
tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=explicit_size)
assert _resolve_openai_image_generation_size(tool) == expected_size


def test_openai_responses_image_generation_tool_aspect_ratio_invalid() -> None:
tool = ImageGenerationTool(aspect_ratio='16:9')

with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'):
_resolve_openai_image_generation_size(tool)


def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size() -> None:
tool = ImageGenerationTool(aspect_ratio='1:1', size='1536x1024')

with pytest.raises(UserError, match='cannot combine `aspect_ratio` with a conflicting `size`'):
_resolve_openai_image_generation_size(tool)


async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str):
model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))

Expand Down
6 changes: 6 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,12 @@ async def model_logic( # noqa: C901
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='160d47')),
]
)
elif m.content == 'Generate a wide illustration of an axolotl city skyline.':
return ModelResponse(
parts=[
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),
]
)
elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':
return ModelResponse(
parts=[
Expand Down