Skip to content

Commit be9acc9

Browse files
authored
[Bash Agent] Add the Bash computer use agent example (#372)
Signed-off-by: Mehran Maghoumi <[email protected]>
1 parent a80e5c4 commit be9acc9

File tree

7 files changed

+455
-0
lines changed

7 files changed

+455
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Bash Computer Use Agent with Nemotron
2+
3+
This code contains the implementation of a simple Bash shell agent that can operate the computer. This agent
4+
is implemented in two different ways:
5+
6+
1. **From-scratch implementation**: where we show how to build the agent in pure Python with just the `openai` package as the dependency.
7+
2. **LangGraph implementation**: where we show how the implementation can be simplified by LangGraph. This implementation requires the `lanchain-openai` and `langgraph` packages.
8+
9+
# How to run?
10+
11+
> ⚠️ **DISCLAIMER**: This software can execute arbitrary Bash commands on your system. Use at your own risk. The authors and NVIDIA assume no responsibility for any damage, data loss, or security breaches resulting from its use. By using this software, you acknowledge and accept these risks.
12+
13+
## Step 1: LLM setup
14+
15+
Setup your LLM endpoint in `config.py`:
16+
17+
- `llm_base_url` should point at your NVIDIA Nemotron Nano 9B v2 provider's base URL (or your hosted endpoint, if self-hosting).
18+
- `llm_model_name` should be your NVIDIA Nemotron Nano 9B v2 provider's name for the model (or your hosted endpoint model name, if self-hosting).
19+
- `llm_api_key` should be the API key for your provider (not needed if self-hosting).
20+
- `llm_temperature` and `llm_top_p` are the sampling settings for your model. These are set to reasonable defaults for Nemotron with reasoning on mode.
21+
22+
An example with [`build.nvidia.com`](https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2) as the provider.
23+
24+
```
25+
class Config:
26+
27+
llm_base_url: str = "https://integrate.api.nvidia.com/v1"
28+
llm_model_name: str = "nvidia/nvidia-nemotron-nano-9b-v2"
29+
llm_api_key: str = "nvapi-XYZ"
30+
...
31+
```
32+
33+
> NOTE - You will need to obtain an API key if you're not locally hosting this model. Instructions available on [this page](https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2) for `build.nvidia.com` by clicking the `View Code` button.
34+
35+
Next, install the dependencies and run the code.
36+
37+
## Step2: Install the dependencies
38+
39+
Use your favorite package manager to install the dependencies. For example:
40+
41+
```bash
42+
pip install -r requirements.txt
43+
```
44+
45+
## Step 3: Execute!
46+
47+
Choose one to run your Bash Agent:
48+
49+
```bash
50+
python main_from_scratch.py # From-scratch implementation
51+
```
52+
53+
or
54+
55+
```bash
56+
python main_langgraph.py # LangGraph implementation
57+
```
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
from typing import Any, Dict, List
2+
import re
3+
import shlex
4+
import subprocess
5+
6+
from config import Config
7+
8+
class Bash:
9+
"""
10+
An implementation of a tool that executes bash commands and keeps track of the working directory.
11+
"""
12+
13+
def __init__(self, config: Config):
14+
self.config = config
15+
# The current working directory (this is tracked and updated throughout the session)
16+
self.cwd = config.root_dir
17+
# Set the initial working directory
18+
self.exec_bash_command(f"cd {self.cwd}")
19+
20+
def exec_bash_command(self, cmd: str) -> Dict[str, str]:
21+
"""
22+
Execute the bash command after checking the allowlist.
23+
"""
24+
if cmd:
25+
# Prevent command injection via backticks or $. This blocks variables too.
26+
if re.search(r"[`$]", cmd):
27+
return {"error": "Command injection patterns are not allowed."}
28+
29+
# Check the allowlist
30+
for cmd_part in self._split_commands(cmd):
31+
if cmd_part not in self.config.allowed_commands:
32+
return {"error": "Parts of this command were not in the allowlist."}
33+
34+
return self._run_bash_command(cmd)
35+
36+
return {"error": "No command was provided"}
37+
38+
def to_json_schema(self) -> Dict[str, Any]:
39+
"""
40+
Convert the function signature to a JSON schema for LLM tool calling.
41+
"""
42+
return {
43+
"type": "function",
44+
"function": {
45+
"name": "exec_bash_command",
46+
"description": "Execute a bash command and return stdout/stderr and the working directory",
47+
"parameters": {
48+
"type": "object",
49+
"properties": {
50+
"cmd": {
51+
"type": "string",
52+
"description": "The bash command to execute"
53+
}
54+
},
55+
"required": ["cmd"],
56+
},
57+
},
58+
}
59+
60+
def _split_commands(self, cmd_str) -> List[str]:
61+
"""
62+
Split a command string into individual commands, without the parameters.
63+
"""
64+
parts = re.split(r'[;&|]+', cmd_str)
65+
commands = []
66+
67+
for part in parts:
68+
tokens = shlex.split(part.strip())
69+
70+
if tokens:
71+
commands.append(tokens[0])
72+
73+
return commands
74+
75+
def _run_bash_command(self, cmd: str) -> Dict[str, str]:
76+
"""
77+
Runs the bash command and catches exceptions (if any).
78+
"""
79+
stdout = ""
80+
stderr = ""
81+
new_cwd = self.cwd
82+
83+
try:
84+
# Wrap the command so we can keep track of the working directory.
85+
wrapped = f"{cmd};echo __END__;pwd"
86+
result = subprocess.run(
87+
wrapped,
88+
shell=True,
89+
cwd=self.cwd,
90+
capture_output=True,
91+
text=True,
92+
executable="/bin/bash"
93+
)
94+
stderr = result.stderr
95+
# Find the separator marker
96+
split = result.stdout.split("__END__")
97+
stdout = split[0].strip()
98+
99+
# If no output/error at all, inform that the call was successful.
100+
if not stdout and not stderr:
101+
stdout = "Command executed successfully, without any output."
102+
103+
# Get the new working directory, and change it
104+
new_cwd = split[-1].strip()
105+
self.cwd = new_cwd
106+
except Exception as e:
107+
stdout = ""
108+
stderr = str(e)
109+
110+
return {
111+
"stdout": stdout,
112+
"stderr": stderr,
113+
"cwd": new_cwd,
114+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import os
2+
from dataclasses import dataclass, field
3+
4+
@dataclass
5+
class Config:
6+
"""
7+
Configuration class for the application.
8+
"""
9+
10+
# -------------------------------------
11+
# LLM configuration
12+
#--------------------------------------
13+
14+
llm_base_url: str = "https://integrate.api.nvidia.com/v1"
15+
llm_model_name: str = "nvidia/nvidia-nemotron-nano-9b-v2"
16+
llm_api_key: str = "(replace with your key, not needed for local models)"
17+
# Sampling parameters (we've reduced the temperature to make the model more deterministic)
18+
llm_temperature: float = 0.1
19+
llm_top_p: float = 0.95
20+
21+
# -------------------------------------
22+
# Agent configuration
23+
#--------------------------------------
24+
25+
# The directory path that the agent can access and operate in.
26+
root_dir: str = os.path.dirname(os.path.abspath(__file__))
27+
28+
# The list of commands that the agent can execute.
29+
#
30+
# WARNING: Be very careful about which commands you allow here.
31+
# By running this code you assume all responsibility for
32+
# unintended consequences of command execution.
33+
allowed_commands: list = field(default_factory=lambda: [
34+
"cd", "cp", "ls", "cat", "find", "touch", "echo", "grep", "pwd", "mkdir", "wget", "sort", "head", "tail", "du",
35+
])
36+
37+
@property
38+
def system_prompt(self) -> str:
39+
"""Generate the system prompt for the LLM based on allowed commands."""
40+
return f"""/think
41+
42+
You are a helpful and very concise Bash assistant with the ability to execute commands in the shell.
43+
You engage with users to help answer questions about bash commands, or execute their intent.
44+
If user intent is unclear, keep engaging with them to figure out what they need and how to best help
45+
them. If they ask question that are not relevant to bash or computer use, decline to answer.
46+
47+
When a command is executed, you will be given the output from that command and any errors. Based on
48+
that, either take further actions or yield control to the user.
49+
50+
The bash interpreter's output and current working directory will be given to you every time a
51+
command is executed. Take that into account for the next conversation.
52+
If there was an error during execution, tell the user what that error was exactly.
53+
54+
You are only allowed to execute the following commands. Break complex tasks into shorter commands from this list:
55+
56+
```
57+
{self.allowed_commands}
58+
```
59+
60+
**Never** attempt to execute a command not in this list. **Never** attempt to execute dangerous commands
61+
like `rm`, `mv`, `rmdir`, `sudo`, etc. If the user asks you to do so, politely refuse.
62+
"""
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from typing import Any, Dict, List, Tuple
2+
from openai import OpenAI
3+
4+
from config import Config
5+
6+
class Messages:
7+
"""
8+
An abstraction for a list of system/user/assistant/tool messages.
9+
"""
10+
11+
def __init__(self, system_message: str = ""):
12+
self.system_message = None
13+
self.messages = []
14+
self.set_system_message(system_message)
15+
16+
def set_system_message(self, message):
17+
self.system_message = {"role": "system", "content": message}
18+
19+
def add_user_message(self, message):
20+
self.messages.append({"role": "user", "content": message})
21+
22+
def add_assistant_message(self, message):
23+
self.messages.append({"role": "assistant", "content": message})
24+
25+
def add_tool_message(self, message, id):
26+
self.messages.append({"role": "tool", "content": str(message), "tool_call_id": id})
27+
28+
def to_list(self) -> List[Dict[str, str]]:
29+
"""
30+
Convert to a list of messages.
31+
"""
32+
return [self.system_message] + self.messages
33+
34+
class LLM:
35+
"""
36+
An abstraction to prompt an LLM with OpenAI compatible endpoint.
37+
"""
38+
39+
def __init__(self, config: Config):
40+
super().__init__()
41+
self.client = OpenAI(base_url=config.llm_base_url, api_key=config.llm_api_key)
42+
self.config = config
43+
print(f"Using model '{config.llm_model_name}' from '{config.llm_base_url}'")
44+
45+
def query(
46+
self,
47+
messages: Messages,
48+
tools: List[Dict[str, Any]],
49+
max_tokens=None,
50+
) -> Tuple[str, List[Dict[str, Any]]]:
51+
completion = self.client.chat.completions.create(
52+
model=self.config.llm_model_name,
53+
messages=messages.to_list(),
54+
tools=tools,
55+
temperature=self.config.llm_temperature,
56+
top_p=self.config.llm_top_p,
57+
max_tokens=max_tokens,
58+
stream=False
59+
)
60+
61+
return (
62+
completion.choices[0].message.content,
63+
completion.choices[0].message.tool_calls or [],
64+
)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import json
2+
3+
from config import Config
4+
from bash import Bash
5+
from helpers import Messages, LLM
6+
7+
def confirm_execution(cmd: str) -> bool:
8+
"""Ask the user whether the suggested command should be executed."""
9+
return input(f" ▶️ Execute '{cmd}'? [y/N]: ").strip().lower() == "y"
10+
11+
def main(config: Config):
12+
bash = Bash(config)
13+
# The model
14+
llm = LLM(config)
15+
# The conversation history, with the system prompt
16+
messages = Messages(config.system_prompt)
17+
print("[INFO] Type 'quit' at any time to exit the agent loop.\n")
18+
19+
# The main agent loop
20+
while True:
21+
# Get user message.
22+
user = input(f"['{bash.cwd}' 🙂] ").strip()
23+
if user.lower() == "quit":
24+
print("\n[🤖] Shutting down. Bye!\n")
25+
break
26+
if not user:
27+
continue
28+
# Always tell the agent where the current working directory is to avoid confusions.
29+
user += f"\n Current working directory: `{bash.cwd}`"
30+
messages.add_user_message(user)
31+
32+
# The tool-call/response loop
33+
while True:
34+
print("\n[🤖] Thinking...")
35+
response, tool_calls = llm.query(messages, [bash.to_json_schema()])
36+
37+
if response:
38+
response = response.strip()
39+
# Do not store the thinking part to save context space
40+
if "</think>" in response:
41+
response = response.split("</think>")[-1].strip()
42+
43+
# Add the (non-empty) response to the context
44+
if response:
45+
messages.add_assistant_message(response)
46+
47+
# Process tool calls
48+
if tool_calls:
49+
for tc in tool_calls:
50+
function_name = tc.function.name
51+
function_args = json.loads(tc.function.arguments)
52+
53+
# Ensure it's calling the right tool
54+
if function_name != "exec_bash_command" or "cmd" not in function_args:
55+
tool_call_result = json.dumps({"error": "Incorrect tool or function argument"})
56+
else:
57+
command = function_args["cmd"]
58+
# Confirm execution with the user
59+
if confirm_execution(command):
60+
tool_call_result = bash.exec_bash_command(command)
61+
else:
62+
tool_call_result = {"error": "The user declined the execution of this command."}
63+
64+
messages.add_tool_message(tool_call_result, tc.id)
65+
else:
66+
# Display the assistant's message to the user.
67+
if response:
68+
print(response)
69+
print("-" * 80 + "\n")
70+
break
71+
72+
if __name__ == "__main__":
73+
# Load the configuration
74+
config = Config()
75+
main(config)

0 commit comments

Comments
 (0)