From d1067be0a3a113ef97b080ec03a7cbbd5f6b6f2b Mon Sep 17 00:00:00 2001 From: ryantzr1 Date: Mon, 5 Jan 2026 13:49:58 +0800 Subject: [PATCH 1/2] Add Codex Agent cookbook documentation - Add new Cookbooks section with Codex Agent guide - Include complete example for local and hub modes - Fix shell tool root user check and error handling --- docs/cookbooks/codex-coding.mdx | 368 +++++++++++++++++++++++++++++ docs/docs.json | 8 +- examples/06_codex_coding_agent.py | 377 ++++++++++++++++++++++++++++++ hud/tools/shell.py | 6 +- 4 files changed, 755 insertions(+), 4 deletions(-) create mode 100644 docs/cookbooks/codex-coding.mdx create mode 100644 examples/06_codex_coding_agent.py diff --git a/docs/cookbooks/codex-coding.mdx b/docs/cookbooks/codex-coding.mdx new file mode 100644 index 00000000..9eacab50 --- /dev/null +++ b/docs/cookbooks/codex-coding.mdx @@ -0,0 +1,368 @@ +--- +title: "Codex Agent" +description: "Build coding agents with OpenAI's native shell and apply_patch tools" +icon: "code" +--- + +HUD provides native support for OpenAI's coding tools (`shell` and `apply_patch`), enabling you to build powerful coding agents that can create, modify, and execute code. + + + Follow along with the full working example on GitHub. + + +## Overview + +OpenAI's Responses API includes specialized tools for coding tasks: + +| Tool | Purpose | HUD Implementation | +| ------------- | ------------------------------------------------------ | -------------------------------------- | +| `shell` | Execute shell commands in a persistent bash session | `hud.tools.shell.ShellTool` | +| `apply_patch` | Create, update, and delete files using V4A diff format | `hud.tools.apply_patch.ApplyPatchTool` | + +When you register tools named `shell` or `apply_patch` in your environment, the `OpenAIAgent` automatically converts them to OpenAI's native tool types for optimal performance. + +## Two Modes + +HUD supports two execution modes for coding agents: + +| Mode | Tools Run On | Inference Via | API Keys Required | +| --------------------- | ------------ | --------------- | ----------------- | +| **Local** (`--local`) | Your machine | OpenAI directly | `OPENAI_API_KEY` | +| **Hub** (default) | HUD Cloud | HUD Gateway | `HUD_API_KEY` | + +Both modes support traces on hud.ai if `HUD_API_KEY` is set. + +## Quick Start + +### Local Mode (No Docker) + +Run coding agents directly on your machine without any infrastructure: + +```python +import hud +from hud.agents.openai import OpenAIAgent +from hud.tools.shell import ShellTool +from hud.tools.apply_patch import ApplyPatchTool + +# Create environment with coding tools +env = hud.Environment("coding") +shell_tool = ShellTool() +apply_patch_tool = ApplyPatchTool(base_path="/path/to/workspace") + +@env.tool() +async def shell(commands: list[str], timeout_ms: int | None = None): + """Execute shell commands.""" + result = await shell_tool(commands=commands, timeout_ms=timeout_ms) + return result.to_dict() + +@env.tool() +async def apply_patch(type: str, path: str, diff: str | None = None): + """Apply file patches.""" + result = await apply_patch_tool(type=type, path=path, diff=diff) + return result.to_dict() + +# Run with OpenAI agent (calls OpenAI directly) +agent = OpenAIAgent.create(model="gpt-5.1") + +async with hud.eval(env(), name="coding-task") as ctx: + result = await agent.run(ctx, max_steps=20) +``` + +### Hub Mode (Cloud Execution) + +Connect to HUD Hub for full cloud execution and telemetry: + +```python +import hud +from hud.agents.openai import OpenAIAgent +from hud.settings import settings +from openai import AsyncOpenAI + +# Connect to HUD Hub environment +env = hud.Environment() +env.connect_hub("codex_sandbox_environment") + +# Use HUD Gateway for inference (full telemetry) +model_client = AsyncOpenAI( + base_url=settings.hud_gateway_url, + api_key=settings.api_key, +) +agent = OpenAIAgent.create( + model="gpt-5.1", + model_client=model_client, + validate_api_key=False, +) + +async with hud.eval(env(), name="coding-task") as ctx: + result = await agent.run(ctx, max_steps=20) +``` + + + The first request may take a few seconds while the environment spins up in the + cloud. Subsequent requests will be faster. + + +## Tool Specifications + +### Shell Tool + +The `ShellTool` provides a persistent bash session for executing commands. + +**Features:** + +- Auto-restart on error (session automatically restarts if needed) +- Dynamic timeout via `timeout_ms` parameter +- Persistent environment (exported variables, working directory) +- Concurrent command execution support + +**Input Schema:** + +```python +{ + "commands": ["ls -la", "cat file.py"], # List of commands + "timeout_ms": 30000, # Optional timeout per command + "max_output_length": 10000 # Optional output limit +} +``` + +**Output Format:** + +```python +{ + "output": [ + { + "stdout": "file1.py\nfile2.py", + "stderr": "", + "outcome": {"type": "exit", "exit_code": 0} + } + ] +} +``` + +### Apply Patch Tool + +The `ApplyPatchTool` creates, updates, and deletes files using OpenAI's V4A diff format. + +**Operations:** + +| Operation | Description | Diff Required | +| ------------- | -------------------- | ------------- | +| `create_file` | Create a new file | Yes | +| `update_file` | Modify existing file | Yes | +| `delete_file` | Remove a file | No | + +**Input Schema:** + +```python +{ + "type": "update_file", + "path": "src/main.py", + "diff": "..." # V4A diff content +} +``` + +**V4A Diff Format Example:** + +```diff +@@ def hello(): +- print("Hello") ++ print("Hello, World!") +``` + +**Output Format:** + +```python +{ + "status": "completed", # or "failed" + "output": "Updated src/main.py" +} +``` + +## Agent Integration + +The `OpenAIAgent` automatically detects `shell` and `apply_patch` tools and converts them to OpenAI's native types: + +```python +# In hud/agents/openai.py +def _to_openai_tool(self, tool): + if tool.name == "shell": + return FunctionShellToolParam(type="shell") + if tool.name == "apply_patch": + return ApplyPatchToolParam(type="apply_patch") + # ... regular function tools +``` + +This means: + +1. The model sees native `shell` and `apply_patch` tools +2. Responses include `shell_call` and `apply_patch_call` output types +3. The agent routes these back to your registered tools + +## Complete Example + +Here's the full local mode example with a working directory: + +```python +import asyncio +import os +import tempfile + +from dotenv import load_dotenv +from openai import AsyncOpenAI + +load_dotenv() # Load .env file + +import hud +from hud.agents.openai import OpenAIAgent +from hud.tools.shell import ShellTool +from hud.tools.apply_patch import ApplyPatchTool + + +async def main(): + # Set up working directory + work_dir = "./codex_output" + os.makedirs(work_dir, exist_ok=True) + base_path = os.path.abspath(work_dir) + + # Initialize tools + shell_tool = ShellTool() + apply_patch_tool = ApplyPatchTool(base_path=base_path) + + # Create environment with local tools + env = hud.Environment("local-codex") + + @env.tool() + async def shell( + commands: list[str], + timeout_ms: int | None = None, + max_output_length: int | None = None, + ) -> dict: + """Execute shell commands in a bash session.""" + # Change to working directory before executing + prefixed_commands = [f"cd {base_path} && {cmd}" for cmd in commands] + result = await shell_tool( + commands=prefixed_commands, + timeout_ms=timeout_ms, + max_output_length=max_output_length, + ) + return result.to_dict() + + @env.tool() + async def apply_patch( + type: str, + path: str, + diff: str | None = None, + ) -> dict: + """Apply file operations using V4A diff format.""" + result = await apply_patch_tool(type=type, path=path, diff=diff) + return result.to_dict() + + # Define scenario + @env.scenario("coding_task") + async def coding_task_scenario(task_description: str): + yield f"""You are a skilled software developer. Complete the following task: + +{task_description} + +Use the available tools: +- `shell` to run commands (ls, cat, python, etc.) +- `apply_patch` to create or modify files + +Work in the current directory. When done, verify your work runs correctly.""" + + yield 1.0 + + # Create agent + agent = OpenAIAgent.create(model="gpt-5.1", verbose=True) + + # Run the task + task = "Create a Python script called main.py that prints Hello World" + eval_task = env("coding_task", task_description=task) + + async with hud.eval(eval_task, name="codex-coding-local") as ctx: + await agent.run(ctx, max_steps=20) + + print(f"Reward: {ctx.reward}") + print(f"Files created in: {base_path}") + + # Show created files + for f in os.listdir(base_path): + print(f" - {f}") + + +asyncio.run(main()) +``` + +## CLI Usage + +### Setting Up API Keys + +Create a `.env` file in your project root: + +```bash +# For local mode (calls OpenAI directly) +OPENAI_API_KEY=sk-... + +# For hub mode OR traces (recommended) +HUD_API_KEY=sk-hud-... +``` + +Get your keys: + +- **HUD_API_KEY**: [hud.ai/project/api-keys](https://hud.ai/project/api-keys) +- **OPENAI_API_KEY**: [platform.openai.com/api-keys](https://platform.openai.com/api-keys) + + + If you have both keys set, you get local execution with cloud traces - the + best of both worlds! + + +### Running the Example + +```bash +# Local mode - tools run on your machine +uv run python examples/06_codex_coding_agent.py --local + +# Local mode with persistent output directory +uv run python examples/06_codex_coding_agent.py --local --work-dir ./codex_output + +# Hub mode - full cloud execution (default) +uv run python examples/06_codex_coding_agent.py + +# Custom task +uv run python examples/06_codex_coding_agent.py --local \ + --task "Create a Python script that prints the Fibonacci sequence up to 10 numbers" + +# Verbose output +uv run python examples/06_codex_coding_agent.py --local --verbose +``` + +### CLI Options + +| Flag | Default | Description | +| ------------- | ------------------ | -------------------------------------------------- | +| `--local` | Off | Run locally (tools on your machine, OpenAI direct) | +| `--task` | Hello World script | The coding task to complete | +| `--model` | `gpt-5.1` | Codex-capable model (`gpt-5.1`, `gpt-5.1-codex`) | +| `--work-dir` | Temp directory | Working directory (local mode only) | +| `--max-steps` | `20` | Maximum agent steps | +| `--verbose` | Off | Enable verbose output | + +## Security Considerations + + + The shell and apply_patch tools can execute arbitrary commands and modify + files. Use them in sandboxed environments for untrusted tasks. + + +## See Also + +- [Codex-capable models](https://platform.openai.com/docs/guides/tools-shell#supported-models) - OpenAI models that support native shell and apply_patch tools +- [Tools Reference](/reference/tools) - Complete tool documentation +- [OpenAI Agent](/reference/agents#openaiagent) - Agent configuration options +- [Integrations](/guides/integrations) - Using HUD with other frameworks +- [Sandboxing](/guides/sandboxing) - Running agents safely diff --git a/docs/docs.json b/docs/docs.json index 71e69e11..114ba090 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -60,6 +60,12 @@ "migration" ] }, + { + "group": "Cookbooks", + "pages": [ + "cookbooks/codex-coding" + ] + }, { "group": "Advanced", "pages": [ @@ -231,4 +237,4 @@ "twitter:description": "OSS Evaluations and RL Environments SDK" } } -} \ No newline at end of file +} diff --git a/examples/06_codex_coding_agent.py b/examples/06_codex_coding_agent.py new file mode 100644 index 00000000..ffdf06f7 --- /dev/null +++ b/examples/06_codex_coding_agent.py @@ -0,0 +1,377 @@ +#!/usr/bin/env python3 +""" +Codex Coding Agent Example + +This example demonstrates how to use OpenAI's **Codex-capable** models with +native `shell` and `apply_patch` tools via the HUD SDK. + +What this shows: +- **Local mode**: Run locally without Docker - tools execute on your machine +- **Hub mode**: Connect to HUD Hub for full telemetry and cloud execution +- OpenAIAgent automatically converts tools to OpenAI's native tool types + +Usage: + # Local mode (no Docker required, no HUD_API_KEY required for OPENAI_API_KEY users) + uv run python examples/06_codex_coding_agent.py --local + + # Hub mode (requires HUD_API_KEY) + export HUD_API_KEY="sk-hud-..." + uv run python examples/06_codex_coding_agent.py + + # Custom task + uv run python examples/06_codex_coding_agent.py --local \\ + --task "Create a Python script that prints the Fibonacci sequence" + +Requirements: + - Install deps: `uv sync` + - For local mode: OPENAI_API_KEY environment variable + - For hub mode: HUD_API_KEY environment variable + - For traces (hud.eval): HUD_API_KEY environment variable +""" + +import argparse +import asyncio +import os + +from dotenv import load_dotenv +from openai import AsyncOpenAI + +# Load .env file from current directory or parent directories +load_dotenv() + +import hud +from hud.agents.openai import OpenAIAgent +from hud.settings import settings +from hud.tools.apply_patch import ApplyPatchTool +from hud.tools.shell import ShellTool + +# ============================================================================= +# Configuration +# ============================================================================= + +# Default hub environment name +DEFAULT_HUB = "codex_sandbox_environment" + +# Codex-capable models that support native shell/apply_patch tools +CODEX_MODELS = { + "gpt-5.1-codex", + "gpt-5.1", +} + + +# ============================================================================= +# Run Coding Task Locally (No Docker) +# ============================================================================= + + +async def run_coding_task_local( + task: str, + model: str = "gpt-5.1", + max_steps: int = 20, + verbose: bool = False, + work_dir: str | None = None, +) -> None: + """ + Run a coding task locally without Docker. + + Uses ShellTool and ApplyPatchTool running on your local machine. + Files are created in a temporary directory (or specified work_dir). + + Args: + task: Description of the coding task + model: OpenAI model to use (default: gpt-5.1) + max_steps: Maximum agent steps (default: 20) + verbose: Enable verbose output + work_dir: Working directory for file operations (default: temp dir) + """ + # Validate model is Codex-capable + if model not in CODEX_MODELS: + raise ValueError( + f"Model '{model}' is not in the Codex-capable list {sorted(CODEX_MODELS)}.\n" + "Use a model that supports native shell/apply_patch tools." + ) + + # Create working directory + if work_dir: + os.makedirs(work_dir, exist_ok=True) + base_path = os.path.abspath(work_dir) + else: + # Default to ./codex_output + work_dir = "./codex_output" + os.makedirs(work_dir, exist_ok=True) + base_path = os.path.abspath(work_dir) + + print(f"📁 Working directory: {base_path}") + + # Initialize tools + shell_tool = ShellTool() + apply_patch_tool = ApplyPatchTool(base_path=base_path) + + # Create environment with local tools + env = hud.Environment("local-codex") + + @env.tool() + async def shell( + commands: list[str], + timeout_ms: int | None = None, + max_output_length: int | None = None, + ) -> dict: + """Execute shell commands in a bash session. + + Args: + commands: List of shell commands to execute + timeout_ms: Optional timeout in milliseconds for each command + max_output_length: Optional max output length hint + """ + # Change to working directory before executing + prefixed_commands = [f"cd {base_path} && {cmd}" for cmd in commands] + result = await shell_tool( + commands=prefixed_commands, + timeout_ms=timeout_ms, + max_output_length=max_output_length, + ) + return result.to_dict() + + @env.tool() + async def apply_patch( + type: str, + path: str, + diff: str | None = None, + ) -> dict: + """Apply file operations using V4A diff format. + + Args: + type: Operation type - "create_file", "update_file", or "delete_file" + path: The file path to operate on + diff: The diff content (required for create_file and update_file) + """ + result = await apply_patch_tool(type=type, path=path, diff=diff) + return result.to_dict() + + # Create OpenAI client + model_client = AsyncOpenAI() + agent = OpenAIAgent.create( + model=model, + model_client=model_client, + verbose=verbose, + ) + + print(f"🤖 Model: {model}") + print(f"📋 Task: {task}") + print("=" * 60) + + # Define a scenario for the coding task + @env.scenario("coding_task") + async def coding_task_scenario(task_description: str): + yield f"""You are a skilled software developer. Complete the following task: + +{task_description} + +Use the available tools: +- `shell` to run commands (ls, cat, python, etc.) +- `apply_patch` to create or modify files + +Work in the current directory. When done, verify your work runs correctly.""" + + # Simple success - task completed + yield 1.0 + + # Run the agent + eval_task = env("coding_task", task_description=task) + + async with hud.eval(eval_task, name="codex-coding-local") as ctx: + await agent.run(ctx, max_steps=max_steps) + + print("=" * 60) + print("✅ Task completed!") + print(f"📊 Reward: {ctx.reward}") + print(f"📁 Files created in: {base_path}") + + # List created files + if os.path.exists(base_path): + files = os.listdir(base_path) + if files: + print(f"📄 Files: {', '.join(files)}") + + +# ============================================================================= +# Run Coding Task via HUD Hub +# ============================================================================= + + +async def run_coding_task_hub( + task: str, + model: str = "gpt-5.1", + max_steps: int = 20, + hub_name: str = DEFAULT_HUB, + verbose: bool = False, +) -> None: + """ + Run a coding task against the codex-sandbox environment via HUD Hub. + + Uses connect_hub() to route through HUD's infrastructure, enabling + full telemetry (both inference and environment steps visible in trace). + + Args: + task: Description of the coding task + model: OpenAI model to use (default: gpt-5.1) + max_steps: Maximum agent steps (default: 20) + hub_name: Hub environment name (default: codex-sandbox) + verbose: Enable verbose output + """ + # Require HUD_API_KEY for hub mode + if not settings.api_key: + raise ValueError( + "HUD_API_KEY is required for hub mode.\n" + "Get yours at: https://hud.ai/project/api-keys\n" + "Then: export HUD_API_KEY='sk-hud-...'\n\n" + "Or use --local flag to run without HUD infrastructure." + ) + + print(f"🌐 Connecting to hub: {hub_name}") + + # Create environment and connect via HUD Hub (full telemetry) + env = hud.Environment() + env.connect_hub(hub_name) + + # Validate model is Codex-capable + if model not in CODEX_MODELS: + raise ValueError( + f"Model '{model}' is not in the Codex-capable list {sorted(CODEX_MODELS)}.\n" + "Use a model that supports native shell/apply_patch tools." + ) + + # Create agent with HUD Gateway for inference telemetry + model_client = AsyncOpenAI( + base_url=settings.hud_gateway_url, + api_key=settings.api_key, + ) + agent = OpenAIAgent.create( + model=model, + model_client=model_client, + validate_api_key=False, # HUD key won't validate against OpenAI + verbose=verbose, + ) + print("🌐 Using HUD Gateway for inference") + + print(f"🤖 Model: {model}") + print(f"📋 Task: {task}") + print("=" * 60) + + # Define a scenario for the coding task + @env.scenario("coding_task") + async def coding_task_scenario(task_description: str): + yield f"""You are a skilled software developer. Complete the following task: + +{task_description} + +Use the available tools: +- `shell` to run commands (ls, cat, python, etc.) +- `apply_patch` to create or modify files + +Work in the current directory. When done, verify your work runs correctly.""" + + # Evaluation is handled by the environment's evaluate tool + yield 1.0 + + # Run the agent + eval_task = env("coding_task", task_description=task) + + async with hud.eval(eval_task, name="codex-coding") as ctx: + await agent.run(ctx, max_steps=max_steps) + + print("=" * 60) + print("✅ Task completed!") + print(f"📊 Reward: {ctx.reward}") + + +# ============================================================================= +# CLI +# ============================================================================= + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run coding tasks with OpenAI's native shell and apply_patch tools", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Local mode (no Docker, no HUD_API_KEY required) + uv run python examples/06_codex_coding_agent.py --local + + # Local mode with custom working directory + uv run python examples/06_codex_coding_agent.py --local --work-dir ./codex_output + + # Hub mode (full telemetry, requires HUD_API_KEY) + uv run python examples/06_codex_coding_agent.py + + # Custom task + uv run python examples/06_codex_coding_agent.py --local \\ + --task "Create a Python script that prints the Fibonacci sequence up to 10 numbers" + + # Verbose output + uv run python examples/06_codex_coding_agent.py --local --verbose + + # Use a different Codex model + uv run python examples/06_codex_coding_agent.py --local --model gpt-5.1-codex +""", + ) + parser.add_argument( + "--local", + action="store_true", + help="Run locally without Docker (tools execute on your machine)", + ) + parser.add_argument( + "--task", + type=str, + default="Create a Python script called main.py that prints 'Hello, World!' and the current date/time", + help="The coding task to complete", + ) + parser.add_argument( + "--model", + type=str, + default="gpt-5.1", + help="Codex-capable OpenAI model (default: gpt-5.1)", + ) + parser.add_argument( + "--max-steps", + type=int, + default=20, + help="Maximum agent steps (default: 20)", + ) + parser.add_argument( + "--work-dir", + type=str, + default=None, + help="Working directory for file operations (local mode only, default: ./codex_output)", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose output", + ) + return parser.parse_args() + + +async def main() -> None: + args = _parse_args() + + if args.local: + await run_coding_task_local( + task=args.task, + model=args.model, + max_steps=args.max_steps, + verbose=args.verbose, + work_dir=args.work_dir, + ) + else: + await run_coding_task_hub( + task=args.task, + model=args.model, + max_steps=args.max_steps, + verbose=args.verbose, + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/hud/tools/shell.py b/hud/tools/shell.py index fe6a7efa..f357c593 100644 --- a/hud/tools/shell.py +++ b/hud/tools/shell.py @@ -82,10 +82,10 @@ async def start(self) -> None: await asyncio.sleep(0) return - # preexec_fn and user demotion only available on Unix + # preexec_fn and user demotion only available on Unix when running as root preexec_fn = None - if sys.platform != "win32": - + if sys.platform != "win32" and os.getuid() == 0: + # Only demote when running as root (e.g., inside Docker containers) def demote() -> None: # This only runs in the child process (Unix only) os.setsid() # type: ignore[attr-defined] From 39566f93c2136a846dd14da080f02a04cd52f036 Mon Sep 17 00:00:00 2001 From: ryantzr1 Date: Thu, 8 Jan 2026 15:00:55 +0800 Subject: [PATCH 2/2] fix: use shlex.quote for safe path escaping in shell commands --- docs/cookbooks/codex-coding.mdx | 14 ++++++++++++-- examples/06_codex_coding_agent.py | 14 ++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/docs/cookbooks/codex-coding.mdx b/docs/cookbooks/codex-coding.mdx index 9eacab50..7903eb9d 100644 --- a/docs/cookbooks/codex-coding.mdx +++ b/docs/cookbooks/codex-coding.mdx @@ -74,6 +74,14 @@ async with hud.eval(env(), name="coding-task") as ctx: ### Hub Mode (Cloud Execution) + + **Prerequisites**: You must create the `codex_environment_sandbox` environment + in [hud.ai](https://hud.ai) first before using hub mode. Go to + [hud.ai](https://hud.ai) → **New** → **Environment** → Import from + [hud-evals/codex_environment_sandbox](https://github.com/hud-evals/codex_environment_sandbox). + Once deployed, your environment will be accessible via `connect_hub()`. + + Connect to HUD Hub for full cloud execution and telemetry: ```python @@ -84,7 +92,7 @@ from openai import AsyncOpenAI # Connect to HUD Hub environment env = hud.Environment() -env.connect_hub("codex_sandbox_environment") +env.connect_hub("codex_environment_sandbox") # Use HUD Gateway for inference (full telemetry) model_client = AsyncOpenAI( @@ -242,8 +250,10 @@ async def main(): max_output_length: int | None = None, ) -> dict: """Execute shell commands in a bash session.""" + import shlex # Change to working directory before executing - prefixed_commands = [f"cd {base_path} && {cmd}" for cmd in commands] + safe_path = shlex.quote(base_path) + prefixed_commands = [f"cd {safe_path} && {cmd}" for cmd in commands] result = await shell_tool( commands=prefixed_commands, timeout_ms=timeout_ms, diff --git a/examples/06_codex_coding_agent.py b/examples/06_codex_coding_agent.py index ffdf06f7..1873e7dd 100644 --- a/examples/06_codex_coding_agent.py +++ b/examples/06_codex_coding_agent.py @@ -32,6 +32,7 @@ import argparse import asyncio import os +import shlex from dotenv import load_dotenv from openai import AsyncOpenAI @@ -50,7 +51,7 @@ # ============================================================================= # Default hub environment name -DEFAULT_HUB = "codex_sandbox_environment" +DEFAULT_HUB = "codex_environment_sandbox" # Codex-capable models that support native shell/apply_patch tools CODEX_MODELS = { @@ -124,7 +125,9 @@ async def shell( max_output_length: Optional max output length hint """ # Change to working directory before executing - prefixed_commands = [f"cd {base_path} && {cmd}" for cmd in commands] + # Use shlex.quote to safely handle paths with spaces or special characters + safe_path = shlex.quote(base_path) + prefixed_commands = [f"cd {safe_path} && {cmd}" for cmd in commands] result = await shell_tool( commands=prefixed_commands, timeout_ms=timeout_ms, @@ -207,16 +210,19 @@ async def run_coding_task_hub( verbose: bool = False, ) -> None: """ - Run a coding task against the codex-sandbox environment via HUD Hub. + Run a coding task against the codex_environment_sandbox via HUD Hub. Uses connect_hub() to route through HUD's infrastructure, enabling full telemetry (both inference and environment steps visible in trace). + Note: You must create the codex_environment_sandbox environment in hud.ai + first before using this function. + Args: task: Description of the coding task model: OpenAI model to use (default: gpt-5.1) max_steps: Maximum agent steps (default: 20) - hub_name: Hub environment name (default: codex-sandbox) + hub_name: Hub environment name (default: codex_environment_sandbox) verbose: Enable verbose output """ # Require HUD_API_KEY for hub mode