Skip to content

Commit cfddc7c

Browse files
authored
Fix Local shell tool: return tool output to the LLM (openai#1855)
1 parent 75fd791 commit cfddc7c

File tree

3 files changed

+217
-5
lines changed

3 files changed

+217
-5
lines changed

examples/tools/local_shell.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import asyncio
2+
import os
3+
import subprocess
4+
5+
from agents import Agent, LocalShellCommandRequest, LocalShellTool, Runner, trace
6+
7+
8+
def shell_executor(request: LocalShellCommandRequest) -> str:
9+
args = request.data.action
10+
11+
try:
12+
completed = subprocess.run(
13+
args.command,
14+
cwd=args.working_directory or os.getcwd(),
15+
env={**os.environ, **args.env} if args.env else os.environ,
16+
capture_output=True,
17+
text=True,
18+
timeout=(args.timeout_ms / 1000) if args.timeout_ms else None,
19+
)
20+
return completed.stdout + completed.stderr
21+
22+
except subprocess.TimeoutExpired:
23+
return "Command execution timed out"
24+
except Exception as e:
25+
return f"Error executing command: {str(e)}"
26+
27+
28+
async def main():
29+
agent = Agent(
30+
name="Shell Assistant",
31+
instructions="You are a helpful assistant that can execute shell commands.",
32+
model="codex-mini-latest", # Local shell tool requires a compatible model
33+
tools=[LocalShellTool(executor=shell_executor)],
34+
)
35+
36+
with trace("Local shell example"):
37+
result = await Runner.run(
38+
agent,
39+
"List the files in the current directory and tell me how many there are.",
40+
)
41+
print(result.final_output)
42+
43+
44+
if __name__ == "__main__":
45+
asyncio.run(main())

src/agents/_run_impl.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,11 @@ async def execute_tools_and_side_effects(
267267
new_step_items: list[RunItem] = []
268268
new_step_items.extend(processed_response.new_items)
269269

270-
# First, lets run the tool calls - function tools and computer actions
270+
# First, lets run the tool calls - function tools, computer actions, and local shell calls
271271
(
272272
(function_results, tool_input_guardrail_results, tool_output_guardrail_results),
273273
computer_results,
274+
local_shell_results,
274275
) = await asyncio.gather(
275276
cls.execute_function_tool_calls(
276277
agent=agent,
@@ -286,9 +287,17 @@ async def execute_tools_and_side_effects(
286287
context_wrapper=context_wrapper,
287288
config=run_config,
288289
),
290+
cls.execute_local_shell_calls(
291+
agent=agent,
292+
calls=processed_response.local_shell_calls,
293+
hooks=hooks,
294+
context_wrapper=context_wrapper,
295+
config=run_config,
296+
),
289297
)
290298
new_step_items.extend([result.run_item for result in function_results])
291299
new_step_items.extend(computer_results)
300+
new_step_items.extend(local_shell_results)
292301

293302
# Next, run the MCP approval requests
294303
if processed_response.mcp_approval_requests:
@@ -1414,12 +1423,13 @@ async def execute(
14141423

14151424
return ToolCallOutputItem(
14161425
agent=agent,
1417-
output=output,
1418-
raw_item={
1426+
output=result,
1427+
# LocalShellCallOutput type uses the field name "id", but the server wants "call_id".
1428+
# raw_item keeps the upstream type, so we ignore the type checker here.
1429+
raw_item={ # type: ignore[misc, arg-type]
14191430
"type": "local_shell_call_output",
1420-
"id": call.tool_call.call_id,
1431+
"call_id": call.tool_call.call_id,
14211432
"output": result,
1422-
# "id": "out" + call.tool_call.id, # TODO remove this, it should be optional
14231433
},
14241434
)
14251435

tests/test_local_shell_tool.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
"""Tests for local shell tool execution.
2+
3+
These confirm that LocalShellAction.execute forwards the command to the executor
4+
and that Runner.run executes local shell calls and records their outputs.
5+
"""
6+
7+
from typing import Any, cast
8+
9+
import pytest
10+
from openai.types.responses import ResponseOutputText
11+
from openai.types.responses.response_output_item import LocalShellCall, LocalShellCallAction
12+
13+
from agents import (
14+
Agent,
15+
LocalShellCommandRequest,
16+
LocalShellTool,
17+
RunConfig,
18+
RunContextWrapper,
19+
RunHooks,
20+
Runner,
21+
)
22+
from agents._run_impl import LocalShellAction, ToolRunLocalShellCall
23+
from agents.items import ToolCallOutputItem
24+
25+
from .fake_model import FakeModel
26+
from .test_responses import get_text_message
27+
28+
29+
class RecordingLocalShellExecutor:
30+
"""A `LocalShellTool` executor that records the requests it receives."""
31+
32+
def __init__(self, output: str = "shell output") -> None:
33+
self.output = output
34+
self.calls: list[LocalShellCommandRequest] = []
35+
36+
def __call__(self, request: LocalShellCommandRequest) -> str:
37+
self.calls.append(request)
38+
return self.output
39+
40+
41+
@pytest.mark.asyncio
42+
async def test_local_shell_action_execute_invokes_executor() -> None:
43+
executor = RecordingLocalShellExecutor(output="test output")
44+
tool = LocalShellTool(executor=executor)
45+
46+
action = LocalShellCallAction(
47+
command=["bash", "-c", "ls"],
48+
env={"TEST": "value"},
49+
type="exec",
50+
timeout_ms=5000,
51+
working_directory="/tmp",
52+
)
53+
tool_call = LocalShellCall(
54+
id="lsh_123",
55+
action=action,
56+
call_id="call_456",
57+
status="completed",
58+
type="local_shell_call",
59+
)
60+
61+
tool_run = ToolRunLocalShellCall(tool_call=tool_call, local_shell_tool=tool)
62+
agent = Agent(name="test_agent", tools=[tool])
63+
context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
64+
65+
output_item = await LocalShellAction.execute(
66+
agent=agent,
67+
call=tool_run,
68+
hooks=RunHooks[Any](),
69+
context_wrapper=context_wrapper,
70+
config=RunConfig(),
71+
)
72+
73+
assert len(executor.calls) == 1
74+
request = executor.calls[0]
75+
assert isinstance(request, LocalShellCommandRequest)
76+
assert request.ctx_wrapper is context_wrapper
77+
assert request.data is tool_call
78+
assert request.data.action.command == ["bash", "-c", "ls"]
79+
assert request.data.action.env == {"TEST": "value"}
80+
assert request.data.action.timeout_ms == 5000
81+
assert request.data.action.working_directory == "/tmp"
82+
83+
assert isinstance(output_item, ToolCallOutputItem)
84+
assert output_item.agent is agent
85+
assert output_item.output == "test output"
86+
87+
raw_item = output_item.raw_item
88+
assert isinstance(raw_item, dict)
89+
raw = cast(dict[str, Any], raw_item)
90+
assert raw["type"] == "local_shell_call_output"
91+
assert raw["call_id"] == "call_456"
92+
assert raw["output"] == "test output"
93+
94+
95+
@pytest.mark.asyncio
96+
async def test_runner_executes_local_shell_calls() -> None:
97+
executor = RecordingLocalShellExecutor(output="shell result")
98+
tool = LocalShellTool(executor=executor)
99+
100+
model = FakeModel()
101+
agent = Agent(name="shell-agent", model=model, tools=[tool])
102+
103+
action = LocalShellCallAction(
104+
command=["bash", "-c", "echo shell"],
105+
env={},
106+
type="exec",
107+
timeout_ms=1000,
108+
working_directory="/tmp",
109+
)
110+
local_shell_call = LocalShellCall(
111+
id="lsh_test",
112+
action=action,
113+
call_id="call_local_shell",
114+
status="completed",
115+
type="local_shell_call",
116+
)
117+
118+
model.add_multiple_turn_outputs(
119+
[
120+
[get_text_message("running shell"), local_shell_call],
121+
[get_text_message("shell complete")],
122+
]
123+
)
124+
125+
result = await Runner.run(agent, input="please run shell")
126+
127+
assert len(executor.calls) == 1
128+
request = executor.calls[0]
129+
assert isinstance(request, LocalShellCommandRequest)
130+
assert request.data is local_shell_call
131+
132+
items = result.new_items
133+
assert len(items) == 4
134+
135+
message_before = items[0]
136+
assert message_before.type == "message_output_item"
137+
first_content = message_before.raw_item.content[0]
138+
assert isinstance(first_content, ResponseOutputText)
139+
assert first_content.text == "running shell"
140+
141+
tool_call_item = items[1]
142+
assert tool_call_item.type == "tool_call_item"
143+
assert tool_call_item.raw_item is local_shell_call
144+
145+
local_shell_output = items[2]
146+
assert isinstance(local_shell_output, ToolCallOutputItem)
147+
assert local_shell_output.raw_item.get("type") == "local_shell_call_output"
148+
assert local_shell_output.output == "shell result"
149+
150+
message_after = items[3]
151+
assert message_after.type == "message_output_item"
152+
last_content = message_after.raw_item.content[0]
153+
assert isinstance(last_content, ResponseOutputText)
154+
assert last_content.text == "shell complete"
155+
156+
assert result.final_output == "shell complete"
157+
assert len(result.raw_responses) == 2

0 commit comments

Comments
 (0)