[Bash Agent] Add the Bash computer use agent example (#372)

Maghoumi · web-flow · commit be9acc9b9286 · 2025-10-21T09:50:35.000-07:00
Signed-off-by: Mehran Maghoumi &lt;Maghoumi@users.noreply.github.com&gt;
diff --git a/nemotron/LLM/bash_computer_use_agent/README.md b/nemotron/LLM/bash_computer_use_agent/README.md
@@ -0,0 +1,57 @@
+# Bash Computer Use Agent with Nemotron
+
+This code contains the implementation of a simple Bash shell agent that can operate the computer. This agent
+is implemented in two different ways:
+
+1. **From-scratch implementation**: where we show how to build the agent in pure Python with just the `openai` package as the dependency.
+2. **LangGraph implementation**: where we show how the implementation can be simplified by LangGraph. This implementation requires the `lanchain-openai` and `langgraph` packages.
+
+# How to run?
+
+> ⚠️ **DISCLAIMER**: This software can execute arbitrary Bash commands on your system. Use at your own risk. The authors and NVIDIA assume no responsibility for any damage, data loss, or security breaches resulting from its use. By using this software, you acknowledge and accept these risks.
+
+## Step 1: LLM setup
+
+Setup your LLM endpoint in `config.py`:
+
+- `llm_base_url` should point at your NVIDIA Nemotron Nano 9B v2 provider's base URL (or your hosted endpoint, if self-hosting).
+- `llm_model_name` should be your NVIDIA Nemotron Nano 9B v2 provider's name for the model (or your hosted endpoint model name, if self-hosting).
+- `llm_api_key` should be the API key for your provider (not needed if self-hosting).
+- `llm_temperature` and `llm_top_p` are the sampling settings for your model. These are set to reasonable defaults for Nemotron with reasoning on mode.
+
+An example with [`build.nvidia.com`](https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2) as the provider.
+
+```
+class Config:
+
+    llm_base_url: str = "https://integrate.api.nvidia.com/v1"
+    llm_model_name: str = "nvidia/nvidia-nemotron-nano-9b-v2"
+    llm_api_key: str = "nvapi-XYZ"
+    ...
+```
+
+> NOTE - You will need to obtain an API key if you're not locally hosting this model. Instructions available on [this page](https://build.nvidia.com/nvidia/nvidia-nemotron-nano-9b-v2) for `build.nvidia.com` by clicking the `View Code` button.
+
+Next, install the dependencies and run the code.
+
+## Step2: Install the dependencies
+
+Use your favorite package manager to install the dependencies. For example:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Step 3: Execute!
+
+Choose one to run your Bash Agent:
+
+```bash
+python main_from_scratch.py  # From-scratch implementation
+```
+
+or
+
+```bash
+python main_langgraph.py  # LangGraph implementation
+```
diff --git a/nemotron/LLM/bash_computer_use_agent/bash.py b/nemotron/LLM/bash_computer_use_agent/bash.py
@@ -0,0 +1,114 @@
+from typing import Any, Dict, List
+import re
+import shlex
+import subprocess
+
+from config import Config
+
+class Bash:
+    """
+    An implementation of a tool that executes bash commands and keeps track of the working directory.
+    """
+
+    def __init__(self, config: Config):
+        self.config = config
+        # The current working directory (this is tracked and updated throughout the session)
+        self.cwd = config.root_dir
+        # Set the initial working directory
+        self.exec_bash_command(f"cd {self.cwd}")
+
+    def exec_bash_command(self, cmd: str) -> Dict[str, str]:
+        """
+        Execute the bash command after checking the allowlist.
+        """
+        if cmd:
+            # Prevent command injection via backticks or $. This blocks variables too.
+            if re.search(r"[`$]", cmd):
+                return {"error": "Command injection patterns are not allowed."}
+
+            # Check the allowlist
+            for cmd_part in self._split_commands(cmd):
+                if cmd_part not in self.config.allowed_commands:
+                    return {"error": "Parts of this command were not in the allowlist."}
+
+            return self._run_bash_command(cmd)
+
+        return {"error": "No command was provided"}
+
+    def to_json_schema(self) -> Dict[str, Any]:
+        """
+        Convert the function signature to a JSON schema for LLM tool calling.
+        """
+        return {
+            "type": "function",
+            "function": {
+                "name": "exec_bash_command",
+                "description": "Execute a bash command and return stdout/stderr and the working directory",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "cmd": {
+                            "type": "string",
+                            "description": "The bash command to execute"
+                        }
+                    },
+                    "required": ["cmd"],
+                },
+            },
+        }
+
+    def _split_commands(self, cmd_str) -> List[str]:
+        """
+        Split a command string into individual commands, without the parameters.
+        """
+        parts = re.split(r'[;&|]+', cmd_str)
+        commands = []
+
+        for part in parts:
+            tokens = shlex.split(part.strip())
+
+            if tokens:
+                commands.append(tokens[0])
+
+        return commands
+
+    def _run_bash_command(self, cmd: str) -> Dict[str, str]:
+        """
+        Runs the bash command and catches exceptions (if any).
+        """
+        stdout = ""
+        stderr = ""
+        new_cwd = self.cwd
+
+        try:
+            # Wrap the command so we can keep track of the working directory.
+            wrapped = f"{cmd};echo __END__;pwd"
+            result = subprocess.run(
+                wrapped,
+                shell=True,
+                cwd=self.cwd,
+                capture_output=True,
+                text=True,
+                executable="/bin/bash"
+            )
+            stderr = result.stderr
+            # Find the separator marker
+            split = result.stdout.split("__END__")
+            stdout = split[0].strip()
+
+            # If no output/error at all, inform that the call was successful.
+            if not stdout and not stderr:
+                stdout = "Command executed successfully, without any output."
+
+            # Get the new working directory, and change it
+            new_cwd = split[-1].strip()
+            self.cwd = new_cwd
+        except Exception as e:
+            stdout = ""
+            stderr = str(e)
+
+        return {
+            "stdout": stdout,
+            "stderr": stderr,
+            "cwd": new_cwd,
+        }
diff --git a/nemotron/LLM/bash_computer_use_agent/config.py b/nemotron/LLM/bash_computer_use_agent/config.py
@@ -0,0 +1,62 @@
+import os
+from dataclasses import dataclass, field
+
+@dataclass
+class Config:
+    """
+    Configuration class for the application.
+    """
+
+    # -------------------------------------
+    # LLM configuration
+    #--------------------------------------
+
+    llm_base_url: str = "https://integrate.api.nvidia.com/v1"
+    llm_model_name: str = "nvidia/nvidia-nemotron-nano-9b-v2"
+    llm_api_key: str = "(replace with your key, not needed for local models)"
+    # Sampling parameters (we've reduced the temperature to make the model more deterministic)
+    llm_temperature: float = 0.1
+    llm_top_p: float = 0.95
+
+    # -------------------------------------
+    # Agent configuration
+    #--------------------------------------
+
+    # The directory path that the agent can access and operate in.
+    root_dir: str = os.path.dirname(os.path.abspath(__file__))
+
+    # The list of commands that the agent can execute.
+    #
+    # WARNING: Be very careful about which commands you allow here.
+    #          By running this code you assume all responsibility for
+    #          unintended consequences of command execution.
+    allowed_commands: list = field(default_factory=lambda: [
+        "cd", "cp", "ls", "cat", "find", "touch", "echo", "grep", "pwd", "mkdir", "wget", "sort", "head", "tail", "du",
+    ])
+
+    @property
+    def system_prompt(self) -> str:
+        """Generate the system prompt for the LLM based on allowed commands."""
+        return f"""/think
+
+You are a helpful and very concise Bash assistant with the ability to execute commands in the shell.
+You engage with users to help answer questions about bash commands, or execute their intent.
+If user intent is unclear, keep engaging with them to figure out what they need and how to best help
+them. If they ask question that are not relevant to bash or computer use, decline to answer.
+
+When a command is executed, you will be given the output from that command and any errors. Based on
+that, either take further actions or yield control to the user.
+
+The bash interpreter's output and current working directory will be given to you every time a
+command is executed. Take that into account for the next conversation.
+If there was an error during execution, tell the user what that error was exactly.
+
+You are only allowed to execute the following commands. Break complex tasks into shorter commands from this list:
+
+```
+{self.allowed_commands}
+```
+
+**Never** attempt to execute a command not in this list. **Never** attempt to execute dangerous commands
+like `rm`, `mv`, `rmdir`, `sudo`, etc. If the user asks you to do so, politely refuse.
+"""
diff --git a/nemotron/LLM/bash_computer_use_agent/helpers.py b/nemotron/LLM/bash_computer_use_agent/helpers.py
@@ -0,0 +1,64 @@
+from typing import Any, Dict, List, Tuple
+from openai import OpenAI
+
+from config import Config
+
+class Messages:
+    """
+    An abstraction for a list of system/user/assistant/tool messages.
+    """
+
+    def __init__(self, system_message: str = ""):
+        self.system_message = None
+        self.messages = []
+        self.set_system_message(system_message)
+
+    def set_system_message(self, message):
+        self.system_message = {"role": "system", "content": message}
+
+    def add_user_message(self, message):
+        self.messages.append({"role": "user", "content": message})
+
+    def add_assistant_message(self, message):
+        self.messages.append({"role": "assistant", "content": message})
+
+    def add_tool_message(self, message, id):
+        self.messages.append({"role": "tool", "content": str(message), "tool_call_id": id})
+
+    def to_list(self) -> List[Dict[str, str]]:
+        """
+        Convert to a list of messages.
+        """
+        return [self.system_message] + self.messages
+
+class LLM:
+    """
+    An abstraction to prompt an LLM with OpenAI compatible endpoint.
+    """
+
+    def __init__(self, config: Config):
+        super().__init__()
+        self.client = OpenAI(base_url=config.llm_base_url, api_key=config.llm_api_key)
+        self.config = config
+        print(f"Using model '{config.llm_model_name}' from '{config.llm_base_url}'")
+
+    def query(
+        self,
+        messages: Messages,
+        tools: List[Dict[str, Any]],
+        max_tokens=None,
+    ) -> Tuple[str, List[Dict[str, Any]]]:
+        completion = self.client.chat.completions.create(
+            model=self.config.llm_model_name,
+            messages=messages.to_list(),
+            tools=tools,
+            temperature=self.config.llm_temperature,
+            top_p=self.config.llm_top_p,
+            max_tokens=max_tokens,
+            stream=False
+        )
+
+        return (
+            completion.choices[0].message.content,
+            completion.choices[0].message.tool_calls or [],
+        )
diff --git a/nemotron/LLM/bash_computer_use_agent/main_from_scratch.py b/nemotron/LLM/bash_computer_use_agent/main_from_scratch.py
@@ -0,0 +1,75 @@
+import json
+
+from config import Config
+from bash import Bash
+from helpers import Messages, LLM
+
+def confirm_execution(cmd: str) -> bool:
+    """Ask the user whether the suggested command should be executed."""
+    return input(f"    ▶️   Execute '{cmd}'? [y/N]: ").strip().lower() == "y"
+
+def main(config: Config):
+    bash = Bash(config)
+    # The model
+    llm = LLM(config)
+    # The conversation history, with the system prompt
+    messages = Messages(config.system_prompt)
+    print("[INFO] Type 'quit' at any time to exit the agent loop.\n")
+
+    # The main agent loop
+    while True:
+        # Get user message.
+        user = input(f"['{bash.cwd}' 🙂] ").strip()
+        if user.lower() == "quit":
+            print("\n[🤖] Shutting down. Bye!\n")
+            break
+        if not user:
+            continue
+        # Always tell the agent where the current working directory is to avoid confusions.
+        user += f"\n Current working directory: `{bash.cwd}`"
+        messages.add_user_message(user)
+
+        # The tool-call/response loop
+        while True:
+            print("\n[🤖] Thinking...")
+            response, tool_calls = llm.query(messages, [bash.to_json_schema()])
+
+            if response:
+                response = response.strip()
+                # Do not store the thinking part to save context space
+                if "</think>" in response:
+                    response = response.split("</think>")[-1].strip()
+
+                # Add the (non-empty) response to the context
+                if response:
+                    messages.add_assistant_message(response)
+
+            # Process tool calls
+            if tool_calls:
+                for tc in tool_calls:
+                    function_name = tc.function.name
+                    function_args = json.loads(tc.function.arguments)
+
+                    # Ensure it's calling the right tool
+                    if function_name != "exec_bash_command" or "cmd" not in function_args:
+                        tool_call_result = json.dumps({"error": "Incorrect tool or function argument"})
+                    else:
+                        command = function_args["cmd"]
+                        # Confirm execution with the user
+                        if confirm_execution(command):
+                            tool_call_result = bash.exec_bash_command(command)
+                        else:
+                            tool_call_result = {"error": "The user declined the execution of this command."}
+
+                    messages.add_tool_message(tool_call_result, tc.id)
+            else:
+                # Display the assistant's message to the user.
+                if response:
+                    print(response)
+                    print("-" * 80 + "\n")
+                break
+
+if __name__ == "__main__":
+    # Load the configuration
+    config = Config()
+    main(config)
diff --git a/nemotron/LLM/bash_computer_use_agent/main_langgraph.py b/nemotron/LLM/bash_computer_use_agent/main_langgraph.py
diff --git a/nemotron/LLM/bash_computer_use_agent/requirements.txt b/nemotron/LLM/bash_computer_use_agent/requirements.txt