From ba4d4cf28f80e197a5a7bef9c1873fd7d70958fe Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Fri, 14 Nov 2025 02:43:01 +0900 Subject: [PATCH] Add new tools for gpt-5.1 --- examples/tools/apply_patch.py | 169 +++++ examples/tools/code_interpreter.py | 29 +- examples/tools/image_generator.py | 36 +- examples/tools/shell.py | 114 +++ examples/tools/web_search_filters.py | 32 +- src/agents/__init__.py | 31 +- src/agents/_run_impl.py | 718 +++++++++++++++++- src/agents/apply_diff.py | 329 ++++++++ src/agents/editor.py | 45 ++ src/agents/items.py | 38 +- src/agents/models/openai_responses.py | 8 + src/agents/run.py | 16 +- src/agents/tool.py | 100 ++- .../memory/test_dapr_redis_integration.py | 17 + tests/test_agents_logging.py | 13 + tests/test_apply_diff.py | 36 + tests/test_apply_diff_helpers.py | 73 ++ tests/test_apply_patch_tool.py | 139 ++++ tests/test_computer_action.py | 5 +- tests/test_function_tool.py | 15 + tests/test_run_step_execution.py | 15 +- tests/test_shell_call_serialization.py | 63 ++ tests/test_shell_tool.py | 137 ++++ tests/test_tool_metadata.py | 72 ++ 24 files changed, 2191 insertions(+), 59 deletions(-) create mode 100644 examples/tools/apply_patch.py create mode 100644 examples/tools/shell.py create mode 100644 src/agents/apply_diff.py create mode 100644 src/agents/editor.py create mode 100644 tests/test_agents_logging.py create mode 100644 tests/test_apply_diff.py create mode 100644 tests/test_apply_diff_helpers.py create mode 100644 tests/test_apply_patch_tool.py create mode 100644 tests/test_shell_call_serialization.py create mode 100644 tests/test_shell_tool.py create mode 100644 tests/test_tool_metadata.py diff --git a/examples/tools/apply_patch.py b/examples/tools/apply_patch.py new file mode 100644 index 000000000..19d0cfb7d --- /dev/null +++ b/examples/tools/apply_patch.py @@ -0,0 +1,169 @@ +import argparse +import asyncio +import hashlib +import os +import tempfile +from pathlib import Path + +from agents import Agent, ApplyPatchTool, ModelSettings, Runner, apply_diff, trace +from agents.editor import ApplyPatchOperation, ApplyPatchResult + + +class ApprovalTracker: + def __init__(self) -> None: + self._approved: set[str] = set() + + def fingerprint(self, operation: ApplyPatchOperation, relative_path: str) -> str: + hasher = hashlib.sha256() + hasher.update(operation.type.encode("utf-8")) + hasher.update(b"\0") + hasher.update(relative_path.encode("utf-8")) + hasher.update(b"\0") + hasher.update((operation.diff or "").encode("utf-8")) + return hasher.hexdigest() + + def remember(self, fingerprint: str) -> None: + self._approved.add(fingerprint) + + def is_approved(self, fingerprint: str) -> bool: + return fingerprint in self._approved + + +class WorkspaceEditor: + def __init__(self, root: Path, approvals: ApprovalTracker, auto_approve: bool) -> None: + self._root = root.resolve() + self._approvals = approvals + self._auto_approve = auto_approve or os.environ.get("APPLY_PATCH_AUTO_APPROVE") == "1" + + def create_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult: + relative = self._relative_path(operation.path) + self._require_approval(operation, relative) + target = self._resolve(operation.path, ensure_parent=True) + diff = operation.diff or "" + content = apply_diff("", diff, mode="create") + target.write_text(content, encoding="utf-8") + return ApplyPatchResult(output=f"Created {relative}") + + def update_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult: + relative = self._relative_path(operation.path) + self._require_approval(operation, relative) + target = self._resolve(operation.path) + original = target.read_text(encoding="utf-8") + diff = operation.diff or "" + patched = apply_diff(original, diff) + target.write_text(patched, encoding="utf-8") + return ApplyPatchResult(output=f"Updated {relative}") + + def delete_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult: + relative = self._relative_path(operation.path) + self._require_approval(operation, relative) + target = self._resolve(operation.path) + target.unlink(missing_ok=True) + return ApplyPatchResult(output=f"Deleted {relative}") + + def _relative_path(self, value: str) -> str: + resolved = self._resolve(value) + return resolved.relative_to(self._root).as_posix() + + def _resolve(self, relative: str, ensure_parent: bool = False) -> Path: + candidate = Path(relative) + target = candidate if candidate.is_absolute() else (self._root / candidate) + target = target.resolve() + try: + target.relative_to(self._root) + except ValueError: + raise RuntimeError(f"Operation outside workspace: {relative}") from None + if ensure_parent: + target.parent.mkdir(parents=True, exist_ok=True) + return target + + def _require_approval(self, operation: ApplyPatchOperation, display_path: str) -> None: + fingerprint = self._approvals.fingerprint(operation, display_path) + if self._auto_approve or self._approvals.is_approved(fingerprint): + self._approvals.remember(fingerprint) + return + + print("\n[apply_patch] approval required") + print(f"- type: {operation.type}") + print(f"- path: {display_path}") + if operation.diff: + preview = operation.diff if len(operation.diff) < 400 else f"{operation.diff[:400]}…" + print("- diff preview:\n", preview) + answer = input("Proceed? [y/N] ").strip().lower() + if answer not in {"y", "yes"}: + raise RuntimeError("Apply patch operation rejected by user.") + self._approvals.remember(fingerprint) + + +async def main(auto_approve: bool, model: str) -> None: + with trace("apply_patch_example"): + with tempfile.TemporaryDirectory(prefix="apply-patch-example-") as workspace: + workspace_path = Path(workspace).resolve() + approvals = ApprovalTracker() + editor = WorkspaceEditor(workspace_path, approvals, auto_approve) + tool = ApplyPatchTool(editor=editor) + previous_response_id: str | None = None + + agent = Agent( + name="Patch Assistant", + model=model, + instructions=( + f"You can edit files inside {workspace_path} using the apply_patch tool. " + "When modifying an existing file, include the file contents between " + " and in your prompt." + ), + tools=[tool], + model_settings=ModelSettings(tool_choice="required"), + ) + + print(f"[info] Workspace root: {workspace_path}") + print(f"[info] Using model: {model}") + print("[run] Creating tasks.md") + result = await Runner.run( + agent, + "Create tasks.md with a shopping checklist of 5 entries.", + previous_response_id=previous_response_id, + ) + previous_response_id = result.last_response_id + print(f"[run] Final response #1:\n{result.final_output}\n") + notes_path = workspace_path / "tasks.md" + if not notes_path.exists(): + raise RuntimeError(f"{notes_path} was not created by the apply_patch tool.") + updated_notes = notes_path.read_text(encoding="utf-8") + print("[file] tasks.md after creation:\n") + print(updated_notes) + + prompt = ( + "\n" + f"===== tasks.md\n{updated_notes}\n" + "\n" + "Check off the last two items from the file." + ) + print("\n[run] Updating tasks.md") + result2 = await Runner.run( + agent, + prompt, + previous_response_id=previous_response_id, + ) + print(f"[run] Final response #2:\n{result2.final_output}\n") + if not notes_path.exists(): + raise RuntimeError("tasks.md vanished unexpectedly before the second read.") + print("[file] Final tasks.md:\n") + print(notes_path.read_text(encoding="utf-8")) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--auto-approve", + action="store_true", + default=False, + help="Skip manual confirmations for apply_patch operations.", + ) + parser.add_argument( + "--model", + default="gpt-5.1", + help="Model ID to use for the agent.", + ) + args = parser.parse_args() + asyncio.run(main(args.auto_approve, args.model)) diff --git a/examples/tools/code_interpreter.py b/examples/tools/code_interpreter.py index 406e570e7..5fcc5f160 100644 --- a/examples/tools/code_interpreter.py +++ b/examples/tools/code_interpreter.py @@ -1,8 +1,16 @@ import asyncio +from collections.abc import Mapping +from typing import Any from agents import Agent, CodeInterpreterTool, Runner, trace +def _get_field(obj: Any, key: str) -> Any: + if isinstance(obj, Mapping): + return obj.get(key) + return getattr(obj, key, None) + + async def main(): agent = Agent( name="Code interpreter", @@ -21,14 +29,19 @@ async def main(): print("Solving math problem...") result = Runner.run_streamed(agent, "What is the square root of273 * 312821 plus 1782?") async for event in result.stream_events(): - if ( - event.type == "run_item_stream_event" - and event.item.type == "tool_call_item" - and event.item.raw_item.type == "code_interpreter_call" - ): - print(f"Code interpreter code:\n```\n{event.item.raw_item.code}\n```\n") - elif event.type == "run_item_stream_event": - print(f"Other event: {event.item.type}") + if event.type != "run_item_stream_event": + continue + + item = event.item + if item.type == "tool_call_item": + raw_call = item.raw_item + if _get_field(raw_call, "type") == "code_interpreter_call": + code = _get_field(raw_call, "code") + if isinstance(code, str): + print(f"Code interpreter code:\n```\n{code}\n```\n") + continue + + print(f"Other event: {event.item.type}") print(f"Final output: {result.final_output}") diff --git a/examples/tools/image_generator.py b/examples/tools/image_generator.py index 747b9ce92..399b51a47 100644 --- a/examples/tools/image_generator.py +++ b/examples/tools/image_generator.py @@ -4,10 +4,18 @@ import subprocess import sys import tempfile +from collections.abc import Mapping +from typing import Any from agents import Agent, ImageGenerationTool, Runner, trace +def _get_field(obj: Any, key: str) -> Any: + if isinstance(obj, Mapping): + return obj.get(key) + return getattr(obj, key, None) + + def open_file(path: str) -> None: if sys.platform.startswith("darwin"): subprocess.run(["open", path], check=False) # macOS @@ -37,17 +45,23 @@ async def main(): ) print(result.final_output) for item in result.new_items: - if ( - item.type == "tool_call_item" - and item.raw_item.type == "image_generation_call" - and (img_result := item.raw_item.result) - ): - with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: - tmp.write(base64.b64decode(img_result)) - temp_path = tmp.name - - # Open the image - open_file(temp_path) + if item.type != "tool_call_item": + continue + + raw_call = item.raw_item + call_type = _get_field(raw_call, "type") + if call_type != "image_generation_call": + continue + + img_result = _get_field(raw_call, "result") + if not isinstance(img_result, str): + continue + + with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: + tmp.write(base64.b64decode(img_result)) + temp_path = tmp.name + + open_file(temp_path) if __name__ == "__main__": diff --git a/examples/tools/shell.py b/examples/tools/shell.py new file mode 100644 index 000000000..7dcb13309 --- /dev/null +++ b/examples/tools/shell.py @@ -0,0 +1,114 @@ +import argparse +import asyncio +import os +from collections.abc import Sequence +from pathlib import Path + +from agents import ( + Agent, + ModelSettings, + Runner, + ShellCallOutcome, + ShellCommandOutput, + ShellCommandRequest, + ShellResult, + ShellTool, + trace, +) + + +class ShellExecutor: + """Executes shell commands with optional approval.""" + + def __init__(self, cwd: Path | None = None): + self.cwd = Path(cwd or Path.cwd()) + + async def __call__(self, request: ShellCommandRequest) -> ShellResult: + action = request.data.action + await require_approval(action.commands) + + outputs: list[ShellCommandOutput] = [] + for command in action.commands: + proc = await asyncio.create_subprocess_shell( + command, + cwd=self.cwd, + env=os.environ.copy(), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + timed_out = False + try: + timeout = (action.timeout_ms or 0) / 1000 or None + stdout_bytes, stderr_bytes = await asyncio.wait_for( + proc.communicate(), timeout=timeout + ) + except asyncio.TimeoutError: + proc.kill() + stdout_bytes, stderr_bytes = await proc.communicate() + timed_out = True + + stdout = stdout_bytes.decode("utf-8", errors="ignore") + stderr = stderr_bytes.decode("utf-8", errors="ignore") + outputs.append( + ShellCommandOutput( + command=command, + stdout=stdout, + stderr=stderr, + outcome=ShellCallOutcome( + type="timeout" if timed_out else "exit", + exit_code=getattr(proc, "returncode", None), + ), + ) + ) + + if timed_out: + break + + return ShellResult( + output=outputs, + provider_data={"working_directory": str(self.cwd)}, + ) + + +async def require_approval(commands: Sequence[str]) -> None: + if os.environ.get("SHELL_AUTO_APPROVE") == "1": + return + print("Shell command approval required:") + for entry in commands: + print(" ", entry) + response = input("Proceed? [y/N] ").strip().lower() + if response not in {"y", "yes"}: + raise RuntimeError("Shell command execution rejected by user.") + + +async def main(prompt: str, model: str) -> None: + with trace("shell_example"): + print(f"[info] Using model: {model}") + agent = Agent( + name="Shell Assistant", + model=model, + instructions=( + "You can run shell commands using the shell tool. " + "Keep responses concise and include command output when helpful." + ), + tools=[ShellTool(executor=ShellExecutor())], + model_settings=ModelSettings(tool_choice="required"), + ) + + result = await Runner.run(agent, prompt) + print(f"\nFinal response:\n{result.final_output}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--prompt", + default="Show the list of files in the current directory.", + help="Instruction to send to the agent.", + ) + parser.add_argument( + "--model", + default="gpt-5.1", + ) + args = parser.parse_args() + asyncio.run(main(args.prompt, args.model)) diff --git a/examples/tools/web_search_filters.py b/examples/tools/web_search_filters.py index 6be30b169..1e1ff0a11 100644 --- a/examples/tools/web_search_filters.py +++ b/examples/tools/web_search_filters.py @@ -1,11 +1,20 @@ import asyncio +from collections.abc import Mapping from datetime import datetime +from typing import Any from openai.types.responses.web_search_tool import Filters from openai.types.shared.reasoning import Reasoning from agents import Agent, ModelSettings, Runner, WebSearchTool, trace + +def _get_field(obj: Any, key: str) -> Any: + if isinstance(obj, Mapping): + return obj.get(key) + return getattr(obj, key, None) + + # import logging # logging.basicConfig(level=logging.DEBUG) @@ -46,10 +55,25 @@ async def main(): print("### Sources ###") print() for item in result.new_items: - if item.type == "tool_call_item": - if item.raw_item.type == "web_search_call": - for source in item.raw_item.action.sources: # type: ignore [union-attr] - print(f"- {source.url}") + if item.type != "tool_call_item": + continue + + raw_call = item.raw_item + call_type = _get_field(raw_call, "type") + if call_type != "web_search_call": + continue + + action = _get_field(raw_call, "action") + sources = _get_field(action, "sources") if action else None + if not sources: + continue + + for source in sources: + url = getattr(source, "url", None) + if url is None and isinstance(source, Mapping): + url = source.get("url") + if url: + print(f"- {url}") print() print("### Final output ###") print() diff --git a/src/agents/__init__.py b/src/agents/__init__.py index b285d6f8c..c6d28aee0 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -13,7 +13,9 @@ ToolsToFinalOutputResult, ) from .agent_output import AgentOutputSchema, AgentOutputSchemaBase +from .apply_diff import apply_diff from .computer import AsyncComputer, Button, Computer, Environment +from .editor import ApplyPatchEditor, ApplyPatchOperation, ApplyPatchResult from .exceptions import ( AgentsException, InputGuardrailTripwireTriggered, @@ -48,7 +50,12 @@ TResponseInputItem, ) from .lifecycle import AgentHooks, RunHooks -from .memory import OpenAIConversationsSession, Session, SessionABC, SQLiteSession +from .memory import ( + OpenAIConversationsSession, + Session, + SessionABC, + SQLiteSession, +) from .model_settings import ModelSettings from .models.interface import Model, ModelProvider, ModelTracing from .models.multi_provider import MultiProvider @@ -67,6 +74,7 @@ StreamEvent, ) from .tool import ( + ApplyPatchTool, CodeInterpreterTool, ComputerTool, FileSearchTool, @@ -80,6 +88,14 @@ MCPToolApprovalFunction, MCPToolApprovalFunctionResult, MCPToolApprovalRequest, + ShellActionRequest, + ShellCallData, + ShellCallOutcome, + ShellCommandOutput, + ShellCommandRequest, + ShellExecutor, + ShellResult, + ShellTool, Tool, ToolOutputFileContent, ToolOutputFileContentDict, @@ -192,6 +208,7 @@ def enable_verbose_stdout_logging(): "ToolsToFinalOutputFunction", "ToolsToFinalOutputResult", "Runner", + "apply_diff", "run_demo_loop", "Model", "ModelProvider", @@ -273,6 +290,18 @@ def enable_verbose_stdout_logging(): "LocalShellCommandRequest", "LocalShellExecutor", "LocalShellTool", + "ShellActionRequest", + "ShellCallData", + "ShellCallOutcome", + "ShellCommandOutput", + "ShellCommandRequest", + "ShellExecutor", + "ShellResult", + "ShellTool", + "ApplyPatchEditor", + "ApplyPatchOperation", + "ApplyPatchResult", + "ApplyPatchTool", "Tool", "WebSearchTool", "HostedMCPTool", diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 88a770a56..d3bd74f9d 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -3,12 +3,14 @@ import asyncio import dataclasses import inspect -from collections.abc import Awaitable +import json +from collections.abc import Awaitable, Mapping, Sequence from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, Literal, Optional, cast from openai.types.responses import ( ResponseComputerToolCall, + ResponseCustomToolCall, ResponseFileSearchToolCall, ResponseFunctionToolCall, ResponseFunctionWebSearch, @@ -44,6 +46,7 @@ from .agent import Agent, ToolsToFinalOutputResult from .agent_output import AgentOutputSchemaBase from .computer import AsyncComputer, Computer +from .editor import ApplyPatchOperation, ApplyPatchResult from .exceptions import ( AgentsException, ModelBehaviorError, @@ -75,6 +78,7 @@ from .run_context import RunContextWrapper, TContext from .stream_events import RunItemStreamEvent, StreamEvent from .tool import ( + ApplyPatchTool, ComputerTool, ComputerToolSafetyCheckData, FunctionTool, @@ -83,6 +87,13 @@ LocalShellCommandRequest, LocalShellTool, MCPToolApprovalRequest, + ShellActionRequest, + ShellCallData, + ShellCallOutcome, + ShellCommandOutput, + ShellCommandRequest, + ShellResult, + ShellTool, Tool, ) from .tool_context import ToolContext @@ -163,6 +174,18 @@ class ToolRunLocalShellCall: local_shell_tool: LocalShellTool +@dataclass +class ToolRunShellCall: + tool_call: Any + shell_tool: ShellTool + + +@dataclass +class ToolRunApplyPatchCall: + tool_call: Any + apply_patch_tool: ApplyPatchTool + + @dataclass class ProcessedResponse: new_items: list[RunItem] @@ -170,6 +193,8 @@ class ProcessedResponse: functions: list[ToolRunFunction] computer_actions: list[ToolRunComputerAction] local_shell_calls: list[ToolRunLocalShellCall] + shell_calls: list[ToolRunShellCall] + apply_patch_calls: list[ToolRunApplyPatchCall] tools_used: list[str] # Names of all tools used, including hosted tools mcp_approval_requests: list[ToolRunMCPApprovalRequest] # Only requests with callbacks @@ -182,6 +207,8 @@ def has_tools_or_approvals_to_run(self) -> bool: self.functions, self.computer_actions, self.local_shell_calls, + self.shell_calls, + self.apply_patch_calls, self.mcp_approval_requests, ] ) @@ -267,10 +294,13 @@ async def execute_tools_and_side_effects( new_step_items: list[RunItem] = [] new_step_items.extend(processed_response.new_items) - # First, lets run the tool calls - function tools, computer actions, and local shell calls + # First, run function tools, computer actions, shell calls, apply_patch calls, + # and legacy local shell calls. ( (function_results, tool_input_guardrail_results, tool_output_guardrail_results), computer_results, + shell_results, + apply_patch_results, local_shell_results, ) = await asyncio.gather( cls.execute_function_tool_calls( @@ -287,6 +317,20 @@ async def execute_tools_and_side_effects( context_wrapper=context_wrapper, config=run_config, ), + cls.execute_shell_calls( + agent=agent, + calls=processed_response.shell_calls, + hooks=hooks, + context_wrapper=context_wrapper, + config=run_config, + ), + cls.execute_apply_patch_calls( + agent=agent, + calls=processed_response.apply_patch_calls, + hooks=hooks, + context_wrapper=context_wrapper, + config=run_config, + ), cls.execute_local_shell_calls( agent=agent, calls=processed_response.local_shell_calls, @@ -297,6 +341,8 @@ async def execute_tools_and_side_effects( ) new_step_items.extend([result.run_item for result in function_results]) new_step_items.extend(computer_results) + new_step_items.extend(shell_results) + new_step_items.extend(apply_patch_results) new_step_items.extend(local_shell_results) # Next, run the MCP approval requests @@ -431,6 +477,8 @@ def process_model_response( functions = [] computer_actions = [] local_shell_calls = [] + shell_calls = [] + apply_patch_calls = [] mcp_approval_requests = [] tools_used: list[str] = [] handoff_map = {handoff.tool_name: handoff for handoff in handoffs} @@ -439,6 +487,10 @@ def process_model_response( local_shell_tool = next( (tool for tool in all_tools if isinstance(tool, LocalShellTool)), None ) + shell_tool = next((tool for tool in all_tools if isinstance(tool, ShellTool)), None) + apply_patch_tool = next( + (tool for tool in all_tools if isinstance(tool, ApplyPatchTool)), None + ) hosted_mcp_server_map = { tool.tool_config["server_label"]: tool for tool in all_tools @@ -446,6 +498,56 @@ def process_model_response( } for output in response.output: + output_type = _get_mapping_or_attr(output, "type") + logger.debug( + "Processing output item type=%s class=%s", + output_type, + output.__class__.__name__ if hasattr(output, "__class__") else type(output), + ) + if output_type == "shell_call": + items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent)) + if not shell_tool: + tools_used.append("shell") + _error_tracing.attach_error_to_current_span( + SpanError( + message="Shell tool not found", + data={}, + ) + ) + raise ModelBehaviorError("Model produced shell call without a shell tool.") + tools_used.append(shell_tool.name) + call_identifier = _get_mapping_or_attr(output, "call_id") or _get_mapping_or_attr( + output, "callId" + ) + logger.debug("Queuing shell_call %s", call_identifier) + shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool)) + continue + if output_type == "apply_patch_call": + items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent)) + if apply_patch_tool: + tools_used.append(apply_patch_tool.name) + call_identifier = _get_mapping_or_attr(output, "call_id") + if not call_identifier: + call_identifier = _get_mapping_or_attr(output, "callId") + logger.debug("Queuing apply_patch_call %s", call_identifier) + apply_patch_calls.append( + ToolRunApplyPatchCall( + tool_call=output, + apply_patch_tool=apply_patch_tool, + ) + ) + else: + tools_used.append("apply_patch") + _error_tracing.attach_error_to_current_span( + SpanError( + message="Apply patch tool not found", + data={}, + ) + ) + raise ModelBehaviorError( + "Model produced apply_patch call without an apply_patch tool." + ) + continue if isinstance(output, ResponseOutputMessage): items.append(MessageOutputItem(raw_item=output, agent=agent)) elif isinstance(output, ResponseFileSearchToolCall): @@ -508,20 +610,84 @@ def process_model_response( tools_used.append("code_interpreter") elif isinstance(output, LocalShellCall): items.append(ToolCallItem(raw_item=output, agent=agent)) - tools_used.append("local_shell") - if not local_shell_tool: + if shell_tool: + tools_used.append(shell_tool.name) + shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool)) + else: + tools_used.append("local_shell") + if not local_shell_tool: + _error_tracing.attach_error_to_current_span( + SpanError( + message="Local shell tool not found", + data={}, + ) + ) + raise ModelBehaviorError( + "Model produced local shell call without a local shell tool." + ) + local_shell_calls.append( + ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool) + ) + elif isinstance(output, ResponseCustomToolCall) and _is_apply_patch_name( + output.name, apply_patch_tool + ): + parsed_operation = _parse_apply_patch_custom_input(output.input) + pseudo_call = { + "type": "apply_patch_call", + "call_id": output.call_id, + "operation": parsed_operation, + } + items.append(ToolCallItem(raw_item=cast(Any, pseudo_call), agent=agent)) + if apply_patch_tool: + tools_used.append(apply_patch_tool.name) + apply_patch_calls.append( + ToolRunApplyPatchCall( + tool_call=pseudo_call, + apply_patch_tool=apply_patch_tool, + ) + ) + else: + tools_used.append("apply_patch") _error_tracing.attach_error_to_current_span( SpanError( - message="Local shell tool not found", + message="Apply patch tool not found", data={}, ) ) raise ModelBehaviorError( - "Model produced local shell call without a local shell tool." + "Model produced apply_patch call without an apply_patch tool." ) - local_shell_calls.append( - ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool) - ) + elif ( + isinstance(output, ResponseFunctionToolCall) + and _is_apply_patch_name(output.name, apply_patch_tool) + and output.name not in function_map + ): + parsed_operation = _parse_apply_patch_function_args(output.arguments) + pseudo_call = { + "type": "apply_patch_call", + "call_id": output.call_id, + "operation": parsed_operation, + } + items.append(ToolCallItem(raw_item=cast(Any, pseudo_call), agent=agent)) + if apply_patch_tool: + tools_used.append(apply_patch_tool.name) + apply_patch_calls.append( + ToolRunApplyPatchCall( + tool_call=pseudo_call, apply_patch_tool=apply_patch_tool + ) + ) + else: + tools_used.append("apply_patch") + _error_tracing.attach_error_to_current_span( + SpanError( + message="Apply patch tool not found", + data={}, + ) + ) + raise ModelBehaviorError( + "Model produced apply_patch call without an apply_patch tool." + ) + continue elif not isinstance(output, ResponseFunctionToolCall): logger.warning(f"Unexpected output type, ignoring: {type(output)}") @@ -581,6 +747,8 @@ def process_model_response( functions=functions, computer_actions=computer_actions, local_shell_calls=local_shell_calls, + shell_calls=shell_calls, + apply_patch_calls=apply_patch_calls, tools_used=tools_used, mcp_approval_requests=mcp_approval_requests, ) @@ -865,6 +1033,52 @@ async def execute_local_shell_calls( ) return results + @classmethod + async def execute_shell_calls( + cls, + *, + agent: Agent[TContext], + calls: list[ToolRunShellCall], + context_wrapper: RunContextWrapper[TContext], + hooks: RunHooks[TContext], + config: RunConfig, + ) -> list[RunItem]: + results: list[RunItem] = [] + for call in calls: + results.append( + await ShellAction.execute( + agent=agent, + call=call, + hooks=hooks, + context_wrapper=context_wrapper, + config=config, + ) + ) + return results + + @classmethod + async def execute_apply_patch_calls( + cls, + *, + agent: Agent[TContext], + calls: list[ToolRunApplyPatchCall], + context_wrapper: RunContextWrapper[TContext], + hooks: RunHooks[TContext], + config: RunConfig, + ) -> list[RunItem]: + results: list[RunItem] = [] + for call in calls: + results.append( + await ApplyPatchAction.execute( + agent=agent, + call=call, + hooks=hooks, + context_wrapper=context_wrapper, + config=config, + ) + ) + return results + @classmethod async def execute_computer_actions( cls, @@ -1423,18 +1637,488 @@ async def execute( ), ) + raw_payload: dict[str, Any] = { + "type": "local_shell_call_output", + "call_id": call.tool_call.call_id, + "output": result, + } return ToolCallOutputItem( agent=agent, output=result, - # LocalShellCallOutput type uses the field name "id", but the server wants "call_id". - # raw_item keeps the upstream type, so we ignore the type checker here. - raw_item={ # type: ignore[misc, arg-type] - "type": "local_shell_call_output", - "call_id": call.tool_call.call_id, - "output": result, - }, + raw_item=raw_payload, + ) + + +class ShellAction: + @classmethod + async def execute( + cls, + *, + agent: Agent[TContext], + call: ToolRunShellCall, + hooks: RunHooks[TContext], + context_wrapper: RunContextWrapper[TContext], + config: RunConfig, + ) -> RunItem: + await asyncio.gather( + hooks.on_tool_start(context_wrapper, agent, call.shell_tool), + ( + agent.hooks.on_tool_start(context_wrapper, agent, call.shell_tool) + if agent.hooks + else _coro.noop_coroutine() + ), + ) + + shell_call = _coerce_shell_call(call.tool_call) + request = ShellCommandRequest(ctx_wrapper=context_wrapper, data=shell_call) + status: Literal["completed", "failed"] = "completed" + output_text = "" + shell_output_payload: list[dict[str, Any]] | None = None + provider_meta: dict[str, Any] | None = None + max_output_length: int | None = None + + try: + executor_result = call.shell_tool.executor(request) + result = ( + await executor_result if inspect.isawaitable(executor_result) else executor_result + ) + + if isinstance(result, ShellResult): + normalized = [_normalize_shell_output(entry) for entry in result.output] + output_text = _render_shell_outputs(normalized) + shell_output_payload = [_serialize_shell_output(entry) for entry in normalized] + provider_meta = dict(result.provider_data or {}) + max_output_length = result.max_output_length + else: + output_text = str(result) + except Exception as exc: + status = "failed" + output_text = _format_shell_error(exc) + logger.error("Shell executor failed: %s", exc, exc_info=True) + + await asyncio.gather( + hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text), + ( + agent.hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text) + if agent.hooks + else _coro.noop_coroutine() + ), + ) + + raw_entries: list[dict[str, Any]] | None = None + if shell_output_payload: + raw_entries = shell_output_payload + elif output_text: + raw_entries = [ + { + "stdout": output_text, + "stderr": "", + "status": status, + "outcome": "success" if status == "completed" else "failure", + } + ] + + structured_output: list[dict[str, Any]] = [] + if raw_entries: + for entry in raw_entries: + sanitized = dict(entry) + status_value = sanitized.pop("status", None) + sanitized.pop("provider_data", None) + raw_exit_code = sanitized.pop("exit_code", None) + sanitized.pop("command", None) + outcome_value = sanitized.get("outcome") + if isinstance(outcome_value, str): + resolved_type = "exit" + if status_value == "timeout": + resolved_type = "timeout" + outcome_payload: dict[str, Any] = {"type": resolved_type} + if resolved_type == "exit": + outcome_payload["exit_code"] = _resolve_exit_code( + raw_exit_code, outcome_value + ) + sanitized["outcome"] = outcome_payload + elif isinstance(outcome_value, Mapping): + outcome_payload = dict(outcome_value) + outcome_status = cast(Optional[str], outcome_payload.pop("status", None)) + outcome_type = outcome_payload.get("type") + if outcome_type != "timeout": + outcome_payload.setdefault( + "exit_code", + _resolve_exit_code( + raw_exit_code, + outcome_status if isinstance(outcome_status, str) else None, + ), + ) + sanitized["outcome"] = outcome_payload + structured_output.append(sanitized) + + raw_item: dict[str, Any] = { + "type": "shell_call_output", + "call_id": shell_call.call_id, + "output": structured_output, + "status": status, + } + if max_output_length is not None: + raw_item["max_output_length"] = max_output_length + if raw_entries: + raw_item["shell_output"] = raw_entries + if provider_meta: + raw_item["provider_data"] = provider_meta + + return ToolCallOutputItem( + agent=agent, + output=output_text, + raw_item=cast(Any, raw_item), + ) + + +class ApplyPatchAction: + @classmethod + async def execute( + cls, + *, + agent: Agent[TContext], + call: ToolRunApplyPatchCall, + hooks: RunHooks[TContext], + context_wrapper: RunContextWrapper[TContext], + config: RunConfig, + ) -> RunItem: + apply_patch_tool = call.apply_patch_tool + await asyncio.gather( + hooks.on_tool_start(context_wrapper, agent, apply_patch_tool), + ( + agent.hooks.on_tool_start(context_wrapper, agent, apply_patch_tool) + if agent.hooks + else _coro.noop_coroutine() + ), ) + status: Literal["completed", "failed"] = "completed" + output_text = "" + + try: + operation = _coerce_apply_patch_operation(call.tool_call) + editor = apply_patch_tool.editor + if operation.type == "create_file": + result = editor.create_file(operation) + elif operation.type == "update_file": + result = editor.update_file(operation) + elif operation.type == "delete_file": + result = editor.delete_file(operation) + else: # pragma: no cover - validated in _coerce_apply_patch_operation + raise ModelBehaviorError(f"Unsupported apply_patch operation: {operation.type}") + + awaited = await result if inspect.isawaitable(result) else result + normalized = _normalize_apply_patch_result(awaited) + if normalized: + if normalized.status in {"completed", "failed"}: + status = normalized.status + if normalized.output: + output_text = normalized.output + except Exception as exc: + status = "failed" + output_text = _format_shell_error(exc) + logger.error("Apply patch editor failed: %s", exc, exc_info=True) + + await asyncio.gather( + hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text), + ( + agent.hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text) + if agent.hooks + else _coro.noop_coroutine() + ), + ) + + raw_item: dict[str, Any] = { + "type": "apply_patch_call_output", + "call_id": _extract_apply_patch_call_id(call.tool_call), + "status": status, + } + if output_text: + raw_item["output"] = output_text + + return ToolCallOutputItem( + agent=agent, + output=output_text, + raw_item=cast(Any, raw_item), + ) + + +def _normalize_shell_output(entry: ShellCommandOutput | Mapping[str, Any]) -> ShellCommandOutput: + if isinstance(entry, ShellCommandOutput): + return entry + + stdout = str(entry.get("stdout", "") or "") + stderr = str(entry.get("stderr", "") or "") + command_value = entry.get("command") + provider_data_value = entry.get("provider_data") + outcome_value = entry.get("outcome") + + outcome_type: Literal["exit", "timeout"] = "exit" + exit_code_value: Any | None = None + + if isinstance(outcome_value, Mapping): + type_value = outcome_value.get("type") + if type_value == "timeout": + outcome_type = "timeout" + elif isinstance(type_value, str): + outcome_type = "exit" + exit_code_value = outcome_value.get("exit_code") or outcome_value.get("exitCode") + else: + status_str = str(entry.get("status", "completed") or "completed").lower() + if status_str == "timeout": + outcome_type = "timeout" + if isinstance(outcome_value, str): + if outcome_value == "failure": + exit_code_value = 1 + elif outcome_value == "success": + exit_code_value = 0 + exit_code_value = exit_code_value or entry.get("exit_code") or entry.get("exitCode") + + outcome = ShellCallOutcome( + type=outcome_type, + exit_code=_normalize_exit_code(exit_code_value), + ) + + return ShellCommandOutput( + stdout=stdout, + stderr=stderr, + outcome=outcome, + command=str(command_value) if command_value is not None else None, + provider_data=cast(dict[str, Any], provider_data_value) + if isinstance(provider_data_value, Mapping) + else provider_data_value, + ) + + +def _serialize_shell_output(output: ShellCommandOutput) -> dict[str, Any]: + payload: dict[str, Any] = { + "stdout": output.stdout, + "stderr": output.stderr, + "status": output.status, + "outcome": {"type": output.outcome.type}, + } + if output.outcome.type == "exit": + payload["outcome"]["exit_code"] = output.outcome.exit_code + if output.outcome.exit_code is not None: + payload["exit_code"] = output.outcome.exit_code + if output.command is not None: + payload["command"] = output.command + if output.provider_data: + payload["provider_data"] = output.provider_data + return payload + + +def _resolve_exit_code(raw_exit_code: Any, outcome_status: str | None) -> int: + normalized = _normalize_exit_code(raw_exit_code) + if normalized is not None: + return normalized + + normalized_status = (outcome_status or "").lower() + if normalized_status == "success": + return 0 + if normalized_status == "failure": + return 1 + return 0 + + +def _normalize_exit_code(value: Any) -> int | None: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _render_shell_outputs(outputs: Sequence[ShellCommandOutput]) -> str: + if not outputs: + return "(no output)" + + rendered_chunks: list[str] = [] + for result in outputs: + chunk_lines: list[str] = [] + if result.command: + chunk_lines.append(f"$ {result.command}") + + stdout = result.stdout.rstrip("\n") + stderr = result.stderr.rstrip("\n") + + if stdout: + chunk_lines.append(stdout) + if stderr: + if stdout: + chunk_lines.append("") + chunk_lines.append("stderr:") + chunk_lines.append(stderr) + + if result.exit_code not in (None, 0): + chunk_lines.append(f"exit code: {result.exit_code}") + if result.status == "timeout": + chunk_lines.append("status: timeout") + + chunk = "\n".join(chunk_lines).strip() + rendered_chunks.append(chunk if chunk else "(no output)") + + return "\n\n".join(rendered_chunks) + + +def _format_shell_error(error: Exception | BaseException | Any) -> str: + if isinstance(error, Exception): + message = str(error) + return message or error.__class__.__name__ + try: + return str(error) + except Exception: # pragma: no cover - fallback only + return repr(error) + + +def _get_mapping_or_attr(target: Any, key: str) -> Any: + if isinstance(target, Mapping): + return target.get(key) + return getattr(target, key, None) + + +def _extract_shell_call_id(tool_call: Any) -> str: + value = _get_mapping_or_attr(tool_call, "call_id") + if not value: + value = _get_mapping_or_attr(tool_call, "callId") + if not value: + raise ModelBehaviorError("Shell call is missing call_id.") + return str(value) + + +def _coerce_shell_call(tool_call: Any) -> ShellCallData: + call_id = _extract_shell_call_id(tool_call) + action_payload = _get_mapping_or_attr(tool_call, "action") + if action_payload is None: + raise ModelBehaviorError("Shell call is missing an action payload.") + + commands_value = _get_mapping_or_attr(action_payload, "commands") + if not isinstance(commands_value, Sequence): + raise ModelBehaviorError("Shell call action is missing commands.") + commands: list[str] = [] + for entry in commands_value: + if entry is None: + continue + commands.append(str(entry)) + if not commands: + raise ModelBehaviorError("Shell call action must include at least one command.") + + timeout_value = ( + _get_mapping_or_attr(action_payload, "timeout_ms") + or _get_mapping_or_attr(action_payload, "timeoutMs") + or _get_mapping_or_attr(action_payload, "timeout") + ) + timeout_ms = int(timeout_value) if isinstance(timeout_value, (int, float)) else None + + max_length_value = _get_mapping_or_attr( + action_payload, "max_output_length" + ) or _get_mapping_or_attr(action_payload, "maxOutputLength") + max_output_length = ( + int(max_length_value) if isinstance(max_length_value, (int, float)) else None + ) + + action = ShellActionRequest( + commands=commands, + timeout_ms=timeout_ms, + max_output_length=max_output_length, + ) + + status_value = _get_mapping_or_attr(tool_call, "status") + status_literal: Literal["in_progress", "completed"] | None = None + if isinstance(status_value, str): + lowered = status_value.lower() + if lowered in {"in_progress", "completed"}: + status_literal = cast(Literal["in_progress", "completed"], lowered) + + return ShellCallData(call_id=call_id, action=action, status=status_literal, raw=tool_call) + + +def _parse_apply_patch_custom_input(input_json: str) -> dict[str, Any]: + try: + parsed = json.loads(input_json or "{}") + except json.JSONDecodeError as exc: + raise ModelBehaviorError(f"Invalid apply_patch input JSON: {exc}") from exc + if not isinstance(parsed, Mapping): + raise ModelBehaviorError("Apply patch input must be a JSON object.") + return dict(parsed) + + +def _parse_apply_patch_function_args(arguments: str) -> dict[str, Any]: + try: + parsed = json.loads(arguments or "{}") + except json.JSONDecodeError as exc: + raise ModelBehaviorError(f"Invalid apply_patch arguments JSON: {exc}") from exc + if not isinstance(parsed, Mapping): + raise ModelBehaviorError("Apply patch arguments must be a JSON object.") + return dict(parsed) + + +def _extract_apply_patch_call_id(tool_call: Any) -> str: + value = _get_mapping_or_attr(tool_call, "call_id") + if not value: + value = _get_mapping_or_attr(tool_call, "callId") + if not value: + raise ModelBehaviorError("Apply patch call is missing call_id.") + return str(value) + + +def _coerce_apply_patch_operation(tool_call: Any) -> ApplyPatchOperation: + raw_operation = _get_mapping_or_attr(tool_call, "operation") + if raw_operation is None: + raise ModelBehaviorError("Apply patch call is missing an operation payload.") + + op_type_value = str(_get_mapping_or_attr(raw_operation, "type")) + if op_type_value not in {"create_file", "update_file", "delete_file"}: + raise ModelBehaviorError(f"Unknown apply_patch operation: {op_type_value}") + op_type_literal = cast(Literal["create_file", "update_file", "delete_file"], op_type_value) + + path = _get_mapping_or_attr(raw_operation, "path") + if not isinstance(path, str) or not path: + raise ModelBehaviorError("Apply patch operation is missing a valid path.") + + diff_value = _get_mapping_or_attr(raw_operation, "diff") + if op_type_literal in {"create_file", "update_file"}: + if not isinstance(diff_value, str) or not diff_value: + raise ModelBehaviorError( + f"Apply patch operation {op_type_literal} is missing the required diff payload." + ) + diff: str | None = diff_value + else: + diff = None + + return ApplyPatchOperation(type=op_type_literal, path=str(path), diff=diff) + + +def _normalize_apply_patch_result( + result: ApplyPatchResult | Mapping[str, Any] | str | None, +) -> ApplyPatchResult | None: + if result is None: + return None + if isinstance(result, ApplyPatchResult): + return result + if isinstance(result, Mapping): + status = result.get("status") + output = result.get("output") + normalized_status = status if status in {"completed", "failed"} else None + normalized_output = str(output) if output is not None else None + return ApplyPatchResult(status=normalized_status, output=normalized_output) + if isinstance(result, str): + return ApplyPatchResult(output=result) + return ApplyPatchResult(output=str(result)) + + +def _is_apply_patch_name(name: str | None, tool: ApplyPatchTool | None) -> bool: + if not name: + return False + candidate = name.strip().lower() + if candidate.startswith("apply_patch"): + return True + if tool and candidate == tool.name.strip().lower(): + return True + return False + def _build_litellm_json_tool_call(output: ResponseFunctionToolCall) -> FunctionTool: async def on_invoke_tool(_ctx: ToolContext[Any], value: Any) -> Any: diff --git a/src/agents/apply_diff.py b/src/agents/apply_diff.py new file mode 100644 index 000000000..e1606e359 --- /dev/null +++ b/src/agents/apply_diff.py @@ -0,0 +1,329 @@ +"""Utility for applying V4A diffs against text inputs.""" + +from __future__ import annotations + +import re +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Callable, Literal + +ApplyDiffMode = Literal["default", "create"] + + +@dataclass +class Chunk: + orig_index: int + del_lines: list[str] + ins_lines: list[str] + + +@dataclass +class ParserState: + lines: list[str] + index: int = 0 + fuzz: int = 0 + + +@dataclass +class ParsedUpdateDiff: + chunks: list[Chunk] + fuzz: int + + +@dataclass +class ReadSectionResult: + next_context: list[str] + section_chunks: list[Chunk] + end_index: int + eof: bool + + +END_PATCH = "*** End Patch" +END_FILE = "*** End of File" +SECTION_TERMINATORS = [ + END_PATCH, + "*** Update File:", + "*** Delete File:", + "*** Add File:", +] +END_SECTION_MARKERS = [*SECTION_TERMINATORS, END_FILE] + + +def apply_diff(input: str, diff: str, mode: ApplyDiffMode = "default") -> str: + """Apply a V4A diff to the provided text. + + This parser understands both the create-file syntax (only "+" prefixed + lines) and the default update syntax that includes context hunks. + """ + + diff_lines = _normalize_diff_lines(diff) + if mode == "create": + return _parse_create_diff(diff_lines) + + parsed = _parse_update_diff(diff_lines, input) + return _apply_chunks(input, parsed.chunks) + + +def _normalize_diff_lines(diff: str) -> list[str]: + lines = [line.rstrip("\r") for line in re.split(r"\r?\n", diff)] + if lines and lines[-1] == "": + lines.pop() + return lines + + +def _is_done(state: ParserState, prefixes: Sequence[str]) -> bool: + if state.index >= len(state.lines): + return True + if any(state.lines[state.index].startswith(prefix) for prefix in prefixes): + return True + return False + + +def _read_str(state: ParserState, prefix: str) -> str: + if state.index >= len(state.lines): + return "" + current = state.lines[state.index] + if current.startswith(prefix): + state.index += 1 + return current[len(prefix) :] + return "" + + +def _parse_create_diff(lines: list[str]) -> str: + parser = ParserState(lines=[*lines, END_PATCH]) + output: list[str] = [] + + while not _is_done(parser, SECTION_TERMINATORS): + if parser.index >= len(parser.lines): + break + line = parser.lines[parser.index] + parser.index += 1 + if not line.startswith("+"): + raise ValueError(f"Invalid Add File Line: {line}") + output.append(line[1:]) + + return "\n".join(output) + + +def _parse_update_diff(lines: list[str], input: str) -> ParsedUpdateDiff: + parser = ParserState(lines=[*lines, END_PATCH]) + input_lines = input.split("\n") + chunks: list[Chunk] = [] + cursor = 0 + + while not _is_done(parser, END_SECTION_MARKERS): + anchor = _read_str(parser, "@@ ") + has_bare_anchor = ( + anchor == "" and parser.index < len(parser.lines) and parser.lines[parser.index] == "@@" + ) + if has_bare_anchor: + parser.index += 1 + + if not (anchor or has_bare_anchor or cursor == 0): + current_line = parser.lines[parser.index] if parser.index < len(parser.lines) else "" + raise ValueError(f"Invalid Line:\n{current_line}") + + if anchor.strip(): + cursor = _advance_cursor_to_anchor(anchor, input_lines, cursor, parser) + + section = _read_section(parser.lines, parser.index) + find_result = _find_context(input_lines, section.next_context, cursor, section.eof) + if find_result.new_index == -1: + ctx_text = "\n".join(section.next_context) + if section.eof: + raise ValueError(f"Invalid EOF Context {cursor}:\n{ctx_text}") + raise ValueError(f"Invalid Context {cursor}:\n{ctx_text}") + + cursor = find_result.new_index + len(section.next_context) + parser.fuzz += find_result.fuzz + parser.index = section.end_index + + for ch in section.section_chunks: + chunks.append( + Chunk( + orig_index=ch.orig_index + find_result.new_index, + del_lines=list(ch.del_lines), + ins_lines=list(ch.ins_lines), + ) + ) + + return ParsedUpdateDiff(chunks=chunks, fuzz=parser.fuzz) + + +def _advance_cursor_to_anchor( + anchor: str, + input_lines: list[str], + cursor: int, + parser: ParserState, +) -> int: + found = False + + if not any(line == anchor for line in input_lines[:cursor]): + for i in range(cursor, len(input_lines)): + if input_lines[i] == anchor: + cursor = i + 1 + found = True + break + + if not found and not any(line.strip() == anchor.strip() for line in input_lines[:cursor]): + for i in range(cursor, len(input_lines)): + if input_lines[i].strip() == anchor.strip(): + cursor = i + 1 + parser.fuzz += 1 + found = True + break + + return cursor + + +def _read_section(lines: list[str], start_index: int) -> ReadSectionResult: + context: list[str] = [] + del_lines: list[str] = [] + ins_lines: list[str] = [] + section_chunks: list[Chunk] = [] + mode: Literal["keep", "add", "delete"] = "keep" + index = start_index + orig_index = index + + while index < len(lines): + raw = lines[index] + if ( + raw.startswith("@@") + or raw.startswith(END_PATCH) + or raw.startswith("*** Update File:") + or raw.startswith("*** Delete File:") + or raw.startswith("*** Add File:") + or raw.startswith(END_FILE) + ): + break + if raw == "***": + break + if raw.startswith("***"): + raise ValueError(f"Invalid Line: {raw}") + + index += 1 + last_mode = mode + line = raw if raw else " " + prefix = line[0] + if prefix == "+": + mode = "add" + elif prefix == "-": + mode = "delete" + elif prefix == " ": + mode = "keep" + else: + raise ValueError(f"Invalid Line: {line}") + + line_content = line[1:] + switching_to_context = mode == "keep" and last_mode != mode + if switching_to_context and (del_lines or ins_lines): + section_chunks.append( + Chunk( + orig_index=len(context) - len(del_lines), + del_lines=list(del_lines), + ins_lines=list(ins_lines), + ) + ) + del_lines = [] + ins_lines = [] + + if mode == "delete": + del_lines.append(line_content) + context.append(line_content) + elif mode == "add": + ins_lines.append(line_content) + else: + context.append(line_content) + + if del_lines or ins_lines: + section_chunks.append( + Chunk( + orig_index=len(context) - len(del_lines), + del_lines=list(del_lines), + ins_lines=list(ins_lines), + ) + ) + + if index < len(lines) and lines[index] == END_FILE: + return ReadSectionResult(context, section_chunks, index + 1, True) + + if index == orig_index: + next_line = lines[index] if index < len(lines) else "" + raise ValueError(f"Nothing in this section - index={index} {next_line}") + + return ReadSectionResult(context, section_chunks, index, False) + + +@dataclass +class ContextMatch: + new_index: int + fuzz: int + + +def _find_context(lines: list[str], context: list[str], start: int, eof: bool) -> ContextMatch: + if eof: + end_start = max(0, len(lines) - len(context)) + end_match = _find_context_core(lines, context, end_start) + if end_match.new_index != -1: + return end_match + fallback = _find_context_core(lines, context, start) + return ContextMatch(new_index=fallback.new_index, fuzz=fallback.fuzz + 10000) + return _find_context_core(lines, context, start) + + +def _find_context_core(lines: list[str], context: list[str], start: int) -> ContextMatch: + if not context: + return ContextMatch(new_index=start, fuzz=0) + + for i in range(start, len(lines)): + if _equals_slice(lines, context, i, lambda value: value): + return ContextMatch(new_index=i, fuzz=0) + for i in range(start, len(lines)): + if _equals_slice(lines, context, i, lambda value: value.rstrip()): + return ContextMatch(new_index=i, fuzz=1) + for i in range(start, len(lines)): + if _equals_slice(lines, context, i, lambda value: value.strip()): + return ContextMatch(new_index=i, fuzz=100) + + return ContextMatch(new_index=-1, fuzz=0) + + +def _equals_slice( + source: list[str], target: list[str], start: int, map_fn: Callable[[str], str] +) -> bool: + if start + len(target) > len(source): + return False + for offset, target_value in enumerate(target): + if map_fn(source[start + offset]) != map_fn(target_value): + return False + return True + + +def _apply_chunks(input: str, chunks: list[Chunk]) -> str: + orig_lines = input.split("\n") + dest_lines: list[str] = [] + cursor = 0 + + for chunk in chunks: + if chunk.orig_index > len(orig_lines): + raise ValueError( + f"applyDiff: chunk.origIndex {chunk.orig_index} > input length {len(orig_lines)}" + ) + if cursor > chunk.orig_index: + raise ValueError( + f"applyDiff: overlapping chunk at {chunk.orig_index} (cursor {cursor})" + ) + + dest_lines.extend(orig_lines[cursor : chunk.orig_index]) + cursor = chunk.orig_index + + if chunk.ins_lines: + dest_lines.extend(chunk.ins_lines) + + cursor += len(chunk.del_lines) + + dest_lines.extend(orig_lines[cursor:]) + return "\n".join(dest_lines) + + +__all__ = ["apply_diff"] diff --git a/src/agents/editor.py b/src/agents/editor.py new file mode 100644 index 000000000..38dd616b3 --- /dev/null +++ b/src/agents/editor.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import sys +from dataclasses import dataclass +from typing import Literal, Protocol, runtime_checkable + +from .util._types import MaybeAwaitable + +ApplyPatchOperationType = Literal["create_file", "update_file", "delete_file"] + +_DATACLASS_KWARGS = {"slots": True} if sys.version_info >= (3, 10) else {} + + +@dataclass(**_DATACLASS_KWARGS) +class ApplyPatchOperation: + """Represents a single apply_patch editor operation requested by the model.""" + + type: ApplyPatchOperationType + path: str + diff: str | None = None + + +@dataclass(**_DATACLASS_KWARGS) +class ApplyPatchResult: + """Optional metadata returned by editor operations.""" + + status: Literal["completed", "failed"] | None = None + output: str | None = None + + +@runtime_checkable +class ApplyPatchEditor(Protocol): + """Host-defined editor that applies diffs on disk.""" + + def create_file( + self, operation: ApplyPatchOperation + ) -> MaybeAwaitable[ApplyPatchResult | str | None]: ... + + def update_file( + self, operation: ApplyPatchOperation + ) -> MaybeAwaitable[ApplyPatchResult | str | None]: ... + + def delete_file( + self, operation: ApplyPatchOperation + ) -> MaybeAwaitable[ApplyPatchResult | str | None]: ... diff --git a/src/agents/items.py b/src/agents/items.py index 8e7d1cfc3..24defb22d 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -2,7 +2,7 @@ import abc from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union +from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union, cast import pydantic from openai.types.responses import ( @@ -141,12 +141,13 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]): ResponseCodeInterpreterToolCall, ImageGenerationCall, LocalShellCall, + dict[str, Any], ] """A type that represents a tool call item.""" @dataclass -class ToolCallItem(RunItemBase[ToolCallItemTypes]): +class ToolCallItem(RunItemBase[Any]): """Represents a tool call e.g. a function call or computer action call.""" raw_item: ToolCallItemTypes @@ -155,13 +156,19 @@ class ToolCallItem(RunItemBase[ToolCallItemTypes]): type: Literal["tool_call_item"] = "tool_call_item" +ToolCallOutputTypes: TypeAlias = Union[ + FunctionCallOutput, + ComputerCallOutput, + LocalShellCallOutput, + dict[str, Any], +] + + @dataclass -class ToolCallOutputItem( - RunItemBase[Union[FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput]] -): +class ToolCallOutputItem(RunItemBase[Any]): """Represents the output of a tool call.""" - raw_item: FunctionCallOutput | ComputerCallOutput | LocalShellCallOutput + raw_item: ToolCallOutputTypes """The raw item from the model.""" output: Any @@ -171,6 +178,25 @@ class ToolCallOutputItem( type: Literal["tool_call_output_item"] = "tool_call_output_item" + def to_input_item(self) -> TResponseInputItem: + """Converts the tool output into an input item for the next model turn. + + Hosted tool outputs (e.g. shell/apply_patch) carry a `status` field for the SDK's + book-keeping, but the Responses API does not yet accept that parameter. Strip it from the + payload we send back to the model while keeping the original raw item intact. + """ + + if isinstance(self.raw_item, dict): + payload = dict(self.raw_item) + payload_type = payload.get("type") + if payload_type == "shell_call_output": + payload.pop("status", None) + payload.pop("shell_output", None) + payload.pop("provider_data", None) + return cast(TResponseInputItem, payload) + + return super().to_input_item() + @dataclass class ReasoningItem(RunItemBase[ResponseReasoningItem]): diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index 36a981404..466496b01 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -27,6 +27,7 @@ from ..logger import logger from ..model_settings import MCPToolChoice from ..tool import ( + ApplyPatchTool, CodeInterpreterTool, ComputerTool, FileSearchTool, @@ -34,6 +35,7 @@ HostedMCPTool, ImageGenerationTool, LocalShellTool, + ShellTool, Tool, WebSearchTool, ) @@ -489,6 +491,12 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, ResponseIncludable | None elif isinstance(tool, HostedMCPTool): converted_tool = tool.tool_config includes = None + elif isinstance(tool, ApplyPatchTool): + converted_tool = cast(ToolParam, {"type": "apply_patch"}) + includes = None + elif isinstance(tool, ShellTool): + converted_tool = cast(ToolParam, {"type": "shell"}) + includes = None elif isinstance(tool, ImageGenerationTool): converted_tool = tool.tool_config includes = None diff --git a/src/agents/run.py b/src/agents/run.py index 5b25df4f2..c14f13e3f 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -6,7 +6,7 @@ import os import warnings from dataclasses import dataclass, field -from typing import Any, Callable, Generic, cast, get_args +from typing import Any, Callable, Generic, cast, get_args, get_origin from openai.types.responses import ( ResponseCompletedEvent, @@ -1886,7 +1886,19 @@ async def _input_guardrail_tripwire_triggered_for_stream( DEFAULT_AGENT_RUNNER = AgentRunner() -_TOOL_CALL_TYPES: tuple[type, ...] = get_args(ToolCallItemTypes) + + +def _get_tool_call_types() -> tuple[type, ...]: + normalized_types: list[type] = [] + for type_hint in get_args(ToolCallItemTypes): + origin = get_origin(type_hint) + candidate = origin or type_hint + if isinstance(candidate, type): + normalized_types.append(candidate) + return tuple(normalized_types) + + +_TOOL_CALL_TYPES: tuple[type, ...] = _get_tool_call_types() def _copy_str_or_list(input: str | list[TResponseInputItem]) -> str | list[TResponseInputItem]: diff --git a/src/agents/tool.py b/src/agents/tool.py index 39db129b7..c3baa6ffc 100644 --- a/src/agents/tool.py +++ b/src/agents/tool.py @@ -3,7 +3,7 @@ import inspect import json from collections.abc import Awaitable -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Callable, Literal, Union, overload from openai.types.responses.file_search_tool_param import Filters, RankingOptions @@ -20,6 +20,7 @@ from . import _debug from .computer import AsyncComputer, Computer +from .editor import ApplyPatchEditor from .exceptions import ModelBehaviorError from .function_schema import DocstringStyle, function_schema from .logger import logger @@ -373,12 +374,109 @@ def name(self): return "local_shell" +@dataclass +class ShellCallOutcome: + """Describes the terminal condition of a shell command.""" + + type: Literal["exit", "timeout"] + exit_code: int | None = None + + +def _default_shell_outcome() -> ShellCallOutcome: + return ShellCallOutcome(type="exit") + + +@dataclass +class ShellCommandOutput: + """Structured output for a single shell command execution.""" + + stdout: str = "" + stderr: str = "" + outcome: ShellCallOutcome = field(default_factory=_default_shell_outcome) + command: str | None = None + provider_data: dict[str, Any] | None = None + + @property + def exit_code(self) -> int | None: + return self.outcome.exit_code + + @property + def status(self) -> Literal["completed", "timeout"]: + return "timeout" if self.outcome.type == "timeout" else "completed" + + +@dataclass +class ShellResult: + """Result returned by a shell executor.""" + + output: list[ShellCommandOutput] + max_output_length: int | None = None + provider_data: dict[str, Any] | None = None + + +@dataclass +class ShellActionRequest: + """Action payload for a next-generation shell call.""" + + commands: list[str] + timeout_ms: int | None = None + max_output_length: int | None = None + + +@dataclass +class ShellCallData: + """Normalized shell call data provided to shell executors.""" + + call_id: str + action: ShellActionRequest + status: Literal["in_progress", "completed"] | None = None + raw: Any | None = None + + +@dataclass +class ShellCommandRequest: + """A request to execute a modern shell call.""" + + ctx_wrapper: RunContextWrapper[Any] + data: ShellCallData + + +ShellExecutor = Callable[[ShellCommandRequest], MaybeAwaitable[Union[str, ShellResult]]] +"""Executes a shell command sequence and returns either text or structured output.""" + + +@dataclass +class ShellTool: + """Next-generation shell tool. LocalShellTool will be deprecated in favor of this.""" + + executor: ShellExecutor + name: str = "shell" + + @property + def type(self) -> str: + return "shell" + + +@dataclass +class ApplyPatchTool: + """Hosted apply_patch tool. Lets the model request file mutations via unified diffs.""" + + editor: ApplyPatchEditor + name: str = "apply_patch" + + @property + def type(self) -> str: + return "apply_patch" + + Tool = Union[ FunctionTool, FileSearchTool, WebSearchTool, ComputerTool, HostedMCPTool, + ShellTool, + ApplyPatchTool, LocalShellTool, ImageGenerationTool, CodeInterpreterTool, diff --git a/tests/extensions/memory/test_dapr_redis_integration.py b/tests/extensions/memory/test_dapr_redis_integration.py index 858ef1801..58d540c21 100644 --- a/tests/extensions/memory/test_dapr_redis_integration.py +++ b/tests/extensions/memory/test_dapr_redis_integration.py @@ -11,15 +11,32 @@ import asyncio import os +import shutil import tempfile import time import urllib.request +import docker # type: ignore[import-untyped] import pytest +from docker.errors import DockerException # type: ignore[import-untyped] # Skip tests if dependencies are not available pytest.importorskip("dapr") # Skip tests if Dapr is not installed pytest.importorskip("testcontainers") # Skip if testcontainers is not installed +if shutil.which("docker") is None: + pytest.skip( + "Docker executable is not available; skipping Dapr integration tests", + allow_module_level=True, + ) +try: + client = docker.from_env() + client.ping() +except DockerException: + pytest.skip( + "Docker daemon is not available; skipping Dapr integration tests", allow_module_level=True + ) +else: + client.close() from testcontainers.core.container import DockerContainer # type: ignore[import-untyped] from testcontainers.core.network import Network # type: ignore[import-untyped] diff --git a/tests/test_agents_logging.py b/tests/test_agents_logging.py new file mode 100644 index 000000000..c63fe3d0e --- /dev/null +++ b/tests/test_agents_logging.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import logging + +from agents import enable_verbose_stdout_logging + + +def test_enable_verbose_stdout_logging_attaches_handler() -> None: + logger = logging.getLogger("openai.agents") + logger.handlers.clear() + enable_verbose_stdout_logging() + assert logger.handlers + logger.handlers.clear() diff --git a/tests/test_apply_diff.py b/tests/test_apply_diff.py new file mode 100644 index 000000000..edb5be99a --- /dev/null +++ b/tests/test_apply_diff.py @@ -0,0 +1,36 @@ +"""Tests for the V4A diff helper.""" + +from __future__ import annotations + +import pytest + +from agents import apply_diff + + +def test_apply_diff_with_floating_hunk_adds_lines() -> None: + diff = "\n".join(["@@", "+hello", "+world"]) # no trailing newline + assert apply_diff("", diff) == "hello\nworld\n" + + +def test_apply_diff_create_mode_requires_plus_prefix() -> None: + diff = "plain line" + with pytest.raises(ValueError): + apply_diff("", diff, mode="create") + + +def test_apply_diff_create_mode_perserves_trailing_newline() -> None: + diff = "\n".join(["+hello", "+world", "+"]) + assert apply_diff("", diff, mode="create") == "hello\nworld\n" + + +def test_apply_diff_applies_contextual_replacement() -> None: + input_text = "line1\nline2\nline3\n" + diff = "\n".join(["@@ line1", "-line2", "+updated", " line3"]) + assert apply_diff(input_text, diff) == "line1\nupdated\nline3\n" + + +def test_apply_diff_raises_on_context_mismatch() -> None: + input_text = "one\ntwo\n" + diff = "\n".join(["@@ -1,2 +1,2 @@", " x", "-two", "+2"]) + with pytest.raises(ValueError): + apply_diff(input_text, diff) diff --git a/tests/test_apply_diff_helpers.py b/tests/test_apply_diff_helpers.py new file mode 100644 index 000000000..12141f42b --- /dev/null +++ b/tests/test_apply_diff_helpers.py @@ -0,0 +1,73 @@ +"""Direct tests for the apply_diff helpers to exercise corner cases.""" + +from __future__ import annotations + +import pytest + +from agents.apply_diff import ( + Chunk, + ParserState, + _apply_chunks, + _find_context, + _find_context_core, + _is_done, + _normalize_diff_lines, + _read_section, + _read_str, +) + + +def test_normalize_diff_lines_drops_trailing_blank() -> None: + assert _normalize_diff_lines("a\nb\n") == ["a", "b"] + + +def test_is_done_true_when_index_out_of_range() -> None: + state = ParserState(lines=["line"], index=1) + assert _is_done(state, []) + + +def test_read_str_returns_empty_when_missing_prefix() -> None: + state = ParserState(lines=["value"], index=0) + assert _read_str(state, "nomatch") == "" + assert state.index == 0 + + +def test_read_section_returns_eof_flag() -> None: + result = _read_section(["*** End of File"], 0) + assert result.eof + + +def test_read_section_raises_on_invalid_marker() -> None: + with pytest.raises(ValueError): + _read_section(["*** Bad Marker"], 0) + + +def test_read_section_raises_when_empty_segment() -> None: + with pytest.raises(ValueError): + _read_section([], 0) + + +def test_find_context_eof_fallbacks() -> None: + match = _find_context(["one"], ["missing"], start=0, eof=True) + assert match.new_index == -1 + assert match.fuzz >= 10000 + + +def test_find_context_core_stripped_matches() -> None: + match = _find_context_core([" line "], ["line"], start=0) + assert match.new_index == 0 + assert match.fuzz == 100 + + +def test_apply_chunks_rejects_bad_chunks() -> None: + with pytest.raises(ValueError): + _apply_chunks("abc", [Chunk(orig_index=10, del_lines=[], ins_lines=[])]) + + with pytest.raises(ValueError): + _apply_chunks( + "abc", + [ + Chunk(orig_index=0, del_lines=["a"], ins_lines=[]), + Chunk(orig_index=0, del_lines=["b"], ins_lines=[]), + ], + ) diff --git a/tests/test_apply_patch_tool.py b/tests/test_apply_patch_tool.py new file mode 100644 index 000000000..197a7550f --- /dev/null +++ b/tests/test_apply_patch_tool.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +import pytest + +from agents import Agent, ApplyPatchTool, RunConfig, RunContextWrapper, RunHooks +from agents._run_impl import ApplyPatchAction, ToolRunApplyPatchCall +from agents.editor import ApplyPatchOperation, ApplyPatchResult +from agents.items import ToolCallOutputItem + + +@dataclass +class DummyApplyPatchCall: + type: str + call_id: str + operation: dict[str, Any] + + +class RecordingEditor: + def __init__(self) -> None: + self.operations: list[ApplyPatchOperation] = [] + + def create_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult: + self.operations.append(operation) + return ApplyPatchResult(output=f"Created {operation.path}") + + def update_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult: + self.operations.append(operation) + return ApplyPatchResult(status="completed", output=f"Updated {operation.path}") + + def delete_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult: + self.operations.append(operation) + return ApplyPatchResult(output=f"Deleted {operation.path}") + + +@pytest.mark.asyncio +async def test_apply_patch_tool_success() -> None: + editor = RecordingEditor() + tool = ApplyPatchTool(editor=editor) + tool_call = DummyApplyPatchCall( + type="apply_patch_call", + call_id="call_apply", + operation={"type": "update_file", "path": "tasks.md", "diff": "-a\n+b\n"}, + ) + tool_run = ToolRunApplyPatchCall(tool_call=tool_call, apply_patch_tool=tool) + agent = Agent(name="patcher", tools=[tool]) + context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None) + + result = await ApplyPatchAction.execute( + agent=agent, + call=tool_run, + hooks=RunHooks[Any](), + context_wrapper=context_wrapper, + config=RunConfig(), + ) + + assert isinstance(result, ToolCallOutputItem) + assert "Updated tasks.md" in result.output + raw_item = cast(dict[str, Any], result.raw_item) + assert raw_item["type"] == "apply_patch_call_output" + assert raw_item["status"] == "completed" + assert raw_item["call_id"] == "call_apply" + assert editor.operations[0].type == "update_file" + assert isinstance(raw_item["output"], str) + assert raw_item["output"].startswith("Updated tasks.md") + input_payload = result.to_input_item() + assert isinstance(input_payload, dict) + payload_dict = cast(dict[str, Any], input_payload) + assert payload_dict["type"] == "apply_patch_call_output" + assert payload_dict["status"] == "completed" + + +@pytest.mark.asyncio +async def test_apply_patch_tool_failure() -> None: + class ExplodingEditor(RecordingEditor): + def update_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult: + raise RuntimeError("boom") + + tool = ApplyPatchTool(editor=ExplodingEditor()) + tool_call = DummyApplyPatchCall( + type="apply_patch_call", + call_id="call_apply_fail", + operation={"type": "update_file", "path": "tasks.md", "diff": "-a\n+b\n"}, + ) + tool_run = ToolRunApplyPatchCall(tool_call=tool_call, apply_patch_tool=tool) + agent = Agent(name="patcher", tools=[tool]) + context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None) + + result = await ApplyPatchAction.execute( + agent=agent, + call=tool_run, + hooks=RunHooks[Any](), + context_wrapper=context_wrapper, + config=RunConfig(), + ) + + assert isinstance(result, ToolCallOutputItem) + assert "boom" in result.output + raw_item = cast(dict[str, Any], result.raw_item) + assert raw_item["status"] == "failed" + assert isinstance(raw_item.get("output"), str) + input_payload = result.to_input_item() + assert isinstance(input_payload, dict) + payload_dict = cast(dict[str, Any], input_payload) + assert payload_dict["type"] == "apply_patch_call_output" + assert payload_dict["status"] == "failed" + + +@pytest.mark.asyncio +async def test_apply_patch_tool_accepts_mapping_call() -> None: + editor = RecordingEditor() + tool = ApplyPatchTool(editor=editor) + tool_call: dict[str, Any] = { + "type": "apply_patch_call", + "call_id": "call_mapping", + "operation": { + "type": "create_file", + "path": "notes.md", + "diff": "+hello\n", + }, + } + tool_run = ToolRunApplyPatchCall(tool_call=tool_call, apply_patch_tool=tool) + agent = Agent(name="patcher", tools=[tool]) + context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None) + + result = await ApplyPatchAction.execute( + agent=agent, + call=tool_run, + hooks=RunHooks[Any](), + context_wrapper=context_wrapper, + config=RunConfig(), + ) + + assert isinstance(result, ToolCallOutputItem) + raw_item = cast(dict[str, Any], result.raw_item) + assert raw_item["call_id"] == "call_mapping" + assert editor.operations[0].path == "notes.md" diff --git a/tests/test_computer_action.py b/tests/test_computer_action.py index a306b1841..53f3aa9d9 100644 --- a/tests/test_computer_action.py +++ b/tests/test_computer_action.py @@ -4,7 +4,7 @@ that screenshots are taken and wrapped appropriately, and that the execute function invokes hooks and returns the expected ToolCallOutputItem.""" -from typing import Any +from typing import Any, cast import pytest from openai.types.responses.response_computer_tool_call import ( @@ -304,9 +304,8 @@ async def test_execute_invokes_hooks_and_returns_tool_call_output() -> None: assert output_item.agent is agent assert isinstance(output_item, ToolCallOutputItem) assert output_item.output == "" - raw = output_item.raw_item + raw = cast(dict[str, Any], output_item.raw_item) # Raw item is a dict-like mapping with expected output fields. - assert isinstance(raw, dict) assert raw["type"] == "computer_call_output" assert raw["output"]["type"] == "computer_screenshot" assert "image_url" in raw["output"] diff --git a/tests/test_function_tool.py b/tests/test_function_tool.py index 9f227aadb..18107773d 100644 --- a/tests/test_function_tool.py +++ b/tests/test_function_tool.py @@ -344,3 +344,18 @@ async def third_tool_on_invoke_tool(ctx: RunContextWrapper[Any], args: str) -> s assert len(tools_with_ctx) == 2 assert tools_with_ctx[0].name == "another_tool" assert tools_with_ctx[1].name == "third_tool" + + +@pytest.mark.asyncio +async def test_async_failure_error_function_is_awaited() -> None: + async def failure_handler(ctx: RunContextWrapper[Any], exc: Exception) -> str: + return f"handled:{exc}" + + @function_tool(failure_error_function=lambda ctx, exc: failure_handler(ctx, exc)) + def boom() -> None: + """Always raises to trigger the failure handler.""" + raise RuntimeError("kapow") + + ctx = ToolContext(None, tool_name=boom.name, tool_call_id="boom", tool_arguments="{}") + result = await boom.on_invoke_tool(ctx, "{}") + assert result.startswith("handled:") diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py index 4cf9ae832..49601bdab 100644 --- a/tests/test_run_step_execution.py +++ b/tests/test_run_step_execution.py @@ -1,7 +1,7 @@ from __future__ import annotations import json -from typing import Any +from typing import Any, cast import pytest from pydantic import BaseModel @@ -303,15 +303,18 @@ def assert_item_is_function_tool_call( item: RunItem, name: str, arguments: str | None = None ) -> None: assert isinstance(item, ToolCallItem) - assert item.raw_item.type == "function_call" - assert item.raw_item.name == name - assert not arguments or item.raw_item.arguments == arguments + raw_item = getattr(item, "raw_item", None) + assert getattr(raw_item, "type", None) == "function_call" + assert getattr(raw_item, "name", None) == name + if arguments: + assert getattr(raw_item, "arguments", None) == arguments def assert_item_is_function_tool_call_output(item: RunItem, output: str) -> None: assert isinstance(item, ToolCallOutputItem) - assert item.raw_item["type"] == "function_call_output" - assert item.raw_item["output"] == output + raw_item = cast(dict[str, Any], item.raw_item) + assert raw_item["type"] == "function_call_output" + assert raw_item["output"] == output async def get_execute_result( diff --git a/tests/test_shell_call_serialization.py b/tests/test_shell_call_serialization.py new file mode 100644 index 000000000..8a592954b --- /dev/null +++ b/tests/test_shell_call_serialization.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import pytest + +from agents import _run_impl as run_impl +from agents.exceptions import ModelBehaviorError +from agents.tool import ShellCallOutcome, ShellCommandOutput + + +def test_coerce_shell_call_reads_max_output_length() -> None: + tool_call = { + "call_id": "shell-1", + "action": { + "commands": ["ls"], + "maxOutputLength": 512, + }, + "status": "in_progress", + } + result = run_impl._coerce_shell_call(tool_call) + assert result.action.max_output_length == 512 + + +def test_coerce_shell_call_requires_commands() -> None: + tool_call = {"call_id": "shell-2", "action": {"commands": []}} + with pytest.raises(ModelBehaviorError): + run_impl._coerce_shell_call(tool_call) + + +def test_normalize_shell_output_handles_timeout() -> None: + entry = { + "stdout": "", + "stderr": "", + "outcome": {"type": "timeout"}, + "provider_data": {"truncated": True}, + } + normalized = run_impl._normalize_shell_output(entry) + assert normalized.status == "timeout" + assert normalized.provider_data == {"truncated": True} + + +def test_normalize_shell_output_converts_string_outcome() -> None: + entry = { + "stdout": "hi", + "stderr": "", + "status": "completed", + "outcome": "success", + "exit_code": 0, + } + normalized = run_impl._normalize_shell_output(entry) + assert normalized.status == "completed" + assert normalized.exit_code in (None, 0) + + +def test_serialize_shell_output_emits_canonical_outcome() -> None: + output = ShellCommandOutput( + stdout="hello", + stderr="", + outcome=ShellCallOutcome(type="exit", exit_code=0), + ) + payload = run_impl._serialize_shell_output(output) + assert payload["outcome"]["type"] == "exit" + assert payload["outcome"]["exit_code"] == 0 + assert "exitCode" not in payload["outcome"] diff --git a/tests/test_shell_tool.py b/tests/test_shell_tool.py new file mode 100644 index 000000000..d2132d6a2 --- /dev/null +++ b/tests/test_shell_tool.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +from typing import Any, cast + +import pytest + +from agents import ( + Agent, + RunConfig, + RunContextWrapper, + RunHooks, + ShellCallOutcome, + ShellCommandOutput, + ShellResult, + ShellTool, +) +from agents._run_impl import ShellAction, ToolRunShellCall +from agents.items import ToolCallOutputItem + + +@pytest.mark.asyncio +async def test_shell_tool_structured_output_is_rendered() -> None: + shell_tool = ShellTool( + executor=lambda request: ShellResult( + output=[ + ShellCommandOutput( + command="echo hi", + stdout="hi\n", + outcome=ShellCallOutcome(type="exit", exit_code=0), + ), + ShellCommandOutput( + command="ls", + stdout="README.md\nsrc\n", + stderr="warning", + outcome=ShellCallOutcome(type="exit", exit_code=1), + ), + ], + provider_data={"runner": "demo"}, + max_output_length=4096, + ) + ) + + tool_call = { + "type": "shell_call", + "id": "shell_call", + "call_id": "call_shell", + "status": "completed", + "action": { + "commands": ["echo hi", "ls"], + "timeout_ms": 1000, + "max_output_length": 4096, + }, + } + + tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool) + agent = Agent(name="shell-agent", tools=[shell_tool]) + context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None) + + result = await ShellAction.execute( + agent=agent, + call=tool_run, + hooks=RunHooks[Any](), + context_wrapper=context_wrapper, + config=RunConfig(), + ) + + assert isinstance(result, ToolCallOutputItem) + assert "$ echo hi" in result.output + assert "stderr:\nwarning" in result.output + + raw_item = cast(dict[str, Any], result.raw_item) + assert raw_item["type"] == "shell_call_output" + assert raw_item["status"] == "completed" + assert raw_item["provider_data"]["runner"] == "demo" + assert raw_item["max_output_length"] == 4096 + shell_output = raw_item["shell_output"] + assert shell_output[1]["exit_code"] == 1 + assert isinstance(raw_item["output"], list) + first_output = raw_item["output"][0] + assert first_output["stdout"].startswith("hi") + assert first_output["outcome"]["type"] == "exit" + assert first_output["outcome"]["exit_code"] == 0 + assert "command" not in first_output + input_payload = result.to_input_item() + assert isinstance(input_payload, dict) + payload_dict = cast(dict[str, Any], input_payload) + assert payload_dict["type"] == "shell_call_output" + assert "status" not in payload_dict + assert "shell_output" not in payload_dict + assert "provider_data" not in payload_dict + + +@pytest.mark.asyncio +async def test_shell_tool_executor_failure_returns_error() -> None: + class ExplodingExecutor: + def __call__(self, request): + raise RuntimeError("boom") + + shell_tool = ShellTool(executor=ExplodingExecutor()) + tool_call = { + "type": "shell_call", + "id": "shell_call_fail", + "call_id": "call_shell_fail", + "status": "completed", + "action": {"commands": ["echo boom"], "timeout_ms": 1000}, + } + tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool) + agent = Agent(name="shell-agent", tools=[shell_tool]) + context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None) + + result = await ShellAction.execute( + agent=agent, + call=tool_run, + hooks=RunHooks[Any](), + context_wrapper=context_wrapper, + config=RunConfig(), + ) + + assert isinstance(result, ToolCallOutputItem) + assert "boom" in result.output + raw_item = cast(dict[str, Any], result.raw_item) + assert raw_item["type"] == "shell_call_output" + assert raw_item["status"] == "failed" + assert isinstance(raw_item["output"], list) + assert "boom" in raw_item["output"][0]["stdout"] + first_output = raw_item["output"][0] + assert first_output["outcome"]["type"] == "exit" + assert first_output["outcome"]["exit_code"] == 1 + assert "command" not in first_output + assert isinstance(raw_item["output"], list) + input_payload = result.to_input_item() + assert isinstance(input_payload, dict) + payload_dict = cast(dict[str, Any], input_payload) + assert payload_dict["type"] == "shell_call_output" + assert "status" not in payload_dict + assert "shell_output" not in payload_dict + assert "provider_data" not in payload_dict diff --git a/tests/test_tool_metadata.py b/tests/test_tool_metadata.py new file mode 100644 index 000000000..ad6395e9b --- /dev/null +++ b/tests/test_tool_metadata.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from typing import cast + +from openai.types.responses.tool_param import CodeInterpreter, ImageGeneration, Mcp + +from agents.computer import Computer +from agents.run_context import RunContextWrapper +from agents.tool import ( + ApplyPatchTool, + CodeInterpreterTool, + ComputerTool, + FileSearchTool, + HostedMCPTool, + ImageGenerationTool, + LocalShellTool, + ShellCallOutcome, + ShellCommandOutput, + ShellTool, + WebSearchTool, +) +from agents.tool_context import ToolContext + + +class DummyEditor: + def create_file(self, operation): + return None + + def update_file(self, operation): + return None + + def delete_file(self, operation): + return None + + +def test_tool_name_properties() -> None: + dummy_computer = cast(Computer, object()) + dummy_mcp = cast(Mcp, {"type": "mcp", "server_label": "demo"}) + dummy_code = cast(CodeInterpreter, {"type": "code_interpreter", "container": "python"}) + dummy_image = cast(ImageGeneration, {"type": "image_generation", "model": "gpt-image-1"}) + + assert FileSearchTool(vector_store_ids=[]).name == "file_search" + assert WebSearchTool().name == "web_search" + assert isinstance(ComputerTool(computer=dummy_computer).name, str) + assert HostedMCPTool(tool_config=dummy_mcp).name == "hosted_mcp" + assert CodeInterpreterTool(tool_config=dummy_code).name == "code_interpreter" + assert ImageGenerationTool(tool_config=dummy_image).name == "image_generation" + assert LocalShellTool(executor=lambda req: "ok").name == "local_shell" + assert ShellTool(executor=lambda req: "ok").type == "shell" + assert ApplyPatchTool(editor=DummyEditor()).type == "apply_patch" + + +def test_shell_command_output_status_property() -> None: + output = ShellCommandOutput(outcome=ShellCallOutcome(type="timeout")) + assert output.status == "timeout" + + +def test_tool_context_from_agent_context() -> None: + ctx = RunContextWrapper(context={"foo": "bar"}) + tool_call = ToolContext.from_agent_context( + ctx, + tool_call_id="123", + tool_call=type( + "Call", + (), + { + "name": "demo", + "arguments": "{}", + }, + )(), + ) + assert tool_call.tool_name == "demo"