Image, file output types for functions

rm-openai · rm-openai · commit fc40d58afefe · 2025-10-14T19:50:15.000-04:00
diff --git a/examples/basic/dynamic_system_prompt.py b/examples/basic/dynamic_system_prompt.py
@@ -28,6 +28,7 @@ def custom_instructions(
     instructions=custom_instructions,
 )
 
+
 async def main():
     context = CustomContext(style=random.choice(["haiku", "pirate", "robot"]))
     print(f"Using style: {context.style}\n")
diff --git a/examples/basic/tools.py b/examples/basic/tools.py
@@ -18,6 +18,7 @@ def get_weather(city: Annotated[str, "The city to get the weather for"]) -> Weat
     print("[debug] get_weather called")
     return Weather(city=city, temperature_range="14-20C", conditions="Sunny with wind.")
 
+
 agent = Agent(
     name="Hello world",
     instructions="You are a helpful agent.",
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
@@ -81,6 +81,12 @@
     MCPToolApprovalFunctionResult,
     MCPToolApprovalRequest,
     Tool,
+    ToolOutputFileContent,
+    ToolOutputFileContentDict,
+    ToolOutputImage,
+    ToolOutputImageDict,
+    ToolOutputText,
+    ToolOutputTextDict,
     WebSearchTool,
     default_tool_error_function,
     function_tool,
@@ -273,6 +279,12 @@ def enable_verbose_stdout_logging():
     "MCPToolApprovalFunction",
     "MCPToolApprovalRequest",
     "MCPToolApprovalFunctionResult",
+    "ToolOutputText",
+    "ToolOutputTextDict",
+    "ToolOutputImage",
+    "ToolOutputImageDict",
+    "ToolOutputFileContent",
+    "ToolOutputFileContentDict",
     "function_tool",
     "Usage",
     "add_trace_processor",
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -823,7 +823,7 @@ async def run_single_tool(
                 output=result,
                 run_item=ToolCallOutputItem(
                     output=result,
-                    raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)),
+                    raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, result),
                     agent=agent,
                 ),
             )
diff --git a/src/agents/extensions/memory/__init__.py b/src/agents/extensions/memory/__init__.py
@@ -58,8 +58,6 @@ def __getattr__(name: str) -> Any:
 
             return AdvancedSQLiteSession
         except ModuleNotFoundError as e:
-            raise ImportError(
-                f"Failed to import AdvancedSQLiteSession: {e}"
-            ) from e
+            raise ImportError(f"Failed to import AdvancedSQLiteSession: {e}") from e
 
     raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/src/agents/items.py b/src/agents/items.py
@@ -21,6 +21,10 @@
 from openai.types.responses.response_code_interpreter_tool_call import (
     ResponseCodeInterpreterToolCall,
 )
+from openai.types.responses.response_function_call_output_item_list_param import (
+    ResponseFunctionCallOutputItemListParam,
+    ResponseFunctionCallOutputItemParam,
+)
 from openai.types.responses.response_input_item_param import (
     ComputerCallOutput,
     FunctionCallOutput,
@@ -36,9 +40,17 @@
 )
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 from pydantic import BaseModel
-from typing_extensions import TypeAlias
+from typing_extensions import TypeAlias, assert_never
 
 from .exceptions import AgentsException, ModelBehaviorError
+from .logger import logger
+from .tool import (
+    ToolOutputFileContent,
+    ToolOutputImage,
+    ToolOutputText,
+    ValidToolOutputPydanticModels,
+    ValidToolOutputPydanticModelsTypeAdapter,
+)
 from .usage import Usage
 
 if TYPE_CHECKING:
@@ -298,11 +310,73 @@ def text_message_output(cls, message: MessageOutputItem) -> str:
 
     @classmethod
     def tool_call_output_item(
-        cls, tool_call: ResponseFunctionToolCall, output: str
+        cls, tool_call: ResponseFunctionToolCall, output: Any
     ) -> FunctionCallOutput:
-        """Creates a tool call output item from a tool call and its output."""
+        """Creates a tool call output item from a tool call and its output.
+
+        Accepts either plain values (stringified) or structured outputs using
+        input_text/input_image/input_file shapes. Structured outputs may be
+        provided as Pydantic models or dicts, or an iterable of such items.
+        """
+
+        converted_output = cls._convert_tool_output(output)
+
         return {
             "call_id": tool_call.call_id,
-            "output": output,
+            "output": converted_output,
             "type": "function_call_output",
         }
+
+    @classmethod
+    def _convert_tool_output(cls, output: Any) -> str | ResponseFunctionCallOutputItemListParam:
+        """Converts a tool return value into an output acceptable by the Responses API."""
+
+        # If the output is either a single or list of the known structured output types, convert to
+        # ResponseFunctionCallOutputItemListParam. Else, just stringify.
+        if isinstance(output, (list, tuple)):
+            maybe_converted_output_list = [
+                cls._maybe_get_output_as_structured_function_output(item) for item in output
+            ]
+            if all(maybe_converted_output_list):
+                return [
+                    cls._convert_single_tool_output_pydantic_model(item)
+                    for item in maybe_converted_output_list
+                    if item is not None
+                ]
+            else:
+                return str(output)
+        else:
+            maybe_converted_output = cls._maybe_get_output_as_structured_function_output(output)
+            if maybe_converted_output:
+                return [cls._convert_single_tool_output_pydantic_model(maybe_converted_output)]
+            else:
+                return str(output)
+
+    @classmethod
+    def _maybe_get_output_as_structured_function_output(
+        cls, output: Any
+    ) -> ValidToolOutputPydanticModels | None:
+        if isinstance(output, (ToolOutputText, ToolOutputImage, ToolOutputFileContent)):
+            return output
+        elif isinstance(output, dict):
+            try:
+                return ValidToolOutputPydanticModelsTypeAdapter.validate_python(output)
+            except pydantic.ValidationError:
+                logger.debug("dict was not a valid tool output pydantic model")
+                return None
+
+        return None
+
+    @classmethod
+    def _convert_single_tool_output_pydantic_model(
+        cls, output: ValidToolOutputPydanticModels
+    ) -> ResponseFunctionCallOutputItemParam:
+        if isinstance(output, ToolOutputText):
+            return {"type": "input_text", "text": output.text}
+        elif isinstance(output, ToolOutputImage):
+            return {"type": "input_image", "image_url": output.image_url}
+        elif isinstance(output, ToolOutputFileContent):
+            return {"type": "input_file", "file_data": output.file_data}
+        else:
+            assert_never(output)
+            raise ValueError(f"Unexpected tool output type: {output}")
diff --git a/src/agents/tool.py b/src/agents/tool.py
@@ -15,14 +15,13 @@
 from openai.types.responses.tool_param import CodeInterpreter, ImageGeneration, Mcp
 from openai.types.responses.web_search_tool import Filters as WebSearchToolFilters
 from openai.types.responses.web_search_tool_param import UserLocation
-from pydantic import ValidationError
+from pydantic import BaseModel, TypeAdapter, ValidationError
 from typing_extensions import Concatenate, NotRequired, ParamSpec, TypedDict
 
 from . import _debug
 from .computer import AsyncComputer, Computer
 from .exceptions import ModelBehaviorError
 from .function_schema import DocstringStyle, function_schema
-from .items import RunItem
 from .logger import logger
 from .run_context import RunContextWrapper
 from .strict_schema import ensure_strict_json_schema
@@ -34,6 +33,8 @@
 
 if TYPE_CHECKING:
     from .agent import Agent, AgentBase
+    from .items import RunItem
+
 
 ToolParams = ParamSpec("ToolParams")
 
@@ -48,6 +49,72 @@
 ]
 
 
+class ToolOutputText(BaseModel):
+    """Represents a tool output that should be sent to the model as text."""
+
+    type: Literal["text"] = "text"
+    text: str
+
+
+class ToolOutputTextDict(TypedDict, total=False):
+    """TypedDict variant for text tool outputs."""
+
+    type: Literal["text"]
+    text: str
+
+
+class ToolOutputImage(BaseModel):
+    """Represents a tool output that should be sent to the model as an image.
+
+    You can provide either an `image_url` (URL or data URL) or a `file_id` for previously uploaded
+    content. The optional `detail` can control vision detail.
+    """
+
+    type: Literal["image"] = "image"
+    image_url: str | None = None
+    file_id: str | None = None
+    detail: Literal["low", "high", "auto"] | None = None
+
+
+class ToolOutputImageDict(TypedDict, total=False):
+    """TypedDict variant for image tool outputs."""
+
+    type: Literal["image"]
+    image_url: NotRequired[str]
+    file_id: NotRequired[str]
+    detail: NotRequired[Literal["low", "high", "auto"]]
+
+
+class ToolOutputFileContent(BaseModel):
+    """Represents a tool output that should be sent to the model as a file.
+
+    Provide one of `file_data` (base64), `file_url`, or `file_id`. You may also
+    provide an optional `filename` when using `file_data` to hint file name.
+    """
+
+    type: Literal["file"] = "file"
+    file_data: str | None = None
+    file_url: str | None = None
+    file_id: str | None = None
+    filename: str | None = None
+
+
+class ToolOutputFileContentDict(TypedDict, total=False):
+    """TypedDict variant for file content tool outputs."""
+
+    type: Literal["file"]
+    file_data: NotRequired[str]
+    file_url: NotRequired[str]
+    file_id: NotRequired[str]
+    filename: NotRequired[str]
+
+
+ValidToolOutputPydanticModels = Union[ToolOutputText, ToolOutputImage, ToolOutputFileContent]
+ValidToolOutputPydanticModelsTypeAdapter: TypeAdapter[ValidToolOutputPydanticModels] = TypeAdapter(
+    ValidToolOutputPydanticModels
+)
+
+
 @dataclass
 class FunctionToolResult:
     tool: FunctionTool
@@ -81,7 +148,9 @@ class FunctionTool:
     1. The tool run context.
     2. The arguments from the LLM, as a JSON string.
 
-    You must return a string representation of the tool output, or something we can call `str()` on.
+    You must return a one of the structured tool output types (e.g. ToolOutputText, ToolOutputImage,
+    ToolOutputFileContent) or a string representation of the tool output, or a list of them,
+    or something we can call `str()` on.
     In case of errors, you can either raise an Exception (which will cause the run to fail) or
     return a string error message (which will be sent back to the LLM).
     """
diff --git a/tests/mcp/test_message_handler.py b/tests/mcp/test_message_handler.py
@@ -21,11 +21,7 @@
     _MCPServerWithClientSession,
 )
 
-HandlerMessage = (
-    RequestResponder[ServerRequest, ClientResult]
-    | ServerNotification
-    | Exception
-)
+HandlerMessage = RequestResponder[ServerRequest, ClientResult] | ServerNotification | Exception
 
 
 class _StubClientSession:
@@ -69,8 +65,9 @@ def __init__(self, handler: MessageHandlerFnT | None):
     def create_streams(self):
         @contextlib.asynccontextmanager
         async def _streams():
-            send_stream, recv_stream = (
-                anyio.create_memory_object_stream[SessionMessage | Exception](1))
+            send_stream, recv_stream = anyio.create_memory_object_stream[
+                SessionMessage | Exception
+            ](1)
             try:
                 yield recv_stream, send_stream, None
             finally:
diff --git a/tests/voice/test_workflow.py b/tests/voice/test_workflow.py
@@ -144,7 +144,11 @@ async def test_single_agent_workflow(monkeypatch) -> None:
                 "status": "completed",
                 "type": "message",
             },
-            {"call_id": "2", "output": "tool_result", "type": "function_call_output"},
+            {
+                "call_id": "2",
+                "output": "tool_result",
+                "type": "function_call_output",
+            },
             {
                 "id": "1",
                 "content": [{"annotations": [], "text": "done", "type": "output_text"}],
@@ -180,7 +184,11 @@ async def test_single_agent_workflow(monkeypatch) -> None:
                 "status": "completed",
                 "type": "message",
             },
-            {"call_id": "2", "output": "tool_result", "type": "function_call_output"},
+            {
+                "call_id": "2",
+                "output": "tool_result",
+                "type": "function_call_output",
+            },
             {
                 "id": "1",
                 "content": [{"annotations": [], "text": "done", "type": "output_text"}],

Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@ def custom_instructions(`
`28`	`28`	`instructions=custom_instructions,`
`29`	`29`	`)`
`30`	`30`
	`31`	`+`
`31`	`32`	`async def main():`
`32`	`33`	`context = CustomContext(style=random.choice(["haiku", "pirate", "robot"]))`
`33`	`34`	`print(f"Using style: {context.style}\n")`
Original file line number	Diff line number	Diff line change
`@@ -823,7 +823,7 @@ async def run_single_tool(`
`823`	`823`	`output=result,`
`824`	`824`	`run_item=ToolCallOutputItem(`
`825`	`825`	`output=result,`
`826`		`- raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)),`
	`826`	`+ raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, result),`
`827`	`827`	`agent=agent,`
`828`	`828`	`),`
`829`	`829`	`)`