Skip to content

Commit ff4f549

Browse files
authored
Merge pull request #97 from hud-evals/extensions-calls-rl
Extensions calls rl
2 parents c425d94 + 4158212 commit ff4f549

File tree

11 files changed

+394
-109
lines changed

11 files changed

+394
-109
lines changed

hud/cli/__init__.py

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
from .push import push_command
3030
from .remove import remove_command
3131
from .utils import CaptureLogger
32-
from .eval import eval_command
3332

3433
# Create the main Typer app
3534
app = typer.Typer(
@@ -132,7 +131,7 @@ def analyze(
132131
def debug(
133132
params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
134133
None,
135-
help="Docker image followed by optional Docker run arguments (e.g., 'hud-image:latest -e KEY=value')", # noqa: E501
134+
help="Docker image, environment directory, or config file followed by optional Docker arguments", # noqa: E501
136135
),
137136
config: Path = typer.Option( # noqa: B008
138137
None,
@@ -148,6 +147,12 @@ def debug(
148147
"--cursor",
149148
help="Debug a server from Cursor config",
150149
),
150+
build: bool = typer.Option(
151+
False,
152+
"--build",
153+
"-b",
154+
help="Build image before debugging (for directory mode)",
155+
),
151156
max_phase: int = typer.Option(
152157
5,
153158
"--max-phase",
@@ -160,15 +165,24 @@ def debug(
160165
"""🐛 Debug MCP environment - test initialization, tools, and readiness.
161166
162167
Examples:
163-
hud debug hud-text-2048:latest
164-
hud debug my-mcp-server:v1 -e API_KEY=xxx -p 8080:8080
168+
hud debug . # Debug current directory
169+
hud debug environments/browser # Debug specific directory
170+
hud debug . --build # Build then debug
171+
hud debug hud-text-2048:latest # Debug Docker image
172+
hud debug my-mcp-server:v1 -e API_KEY=xxx
165173
hud debug --config mcp-config.json
166174
hud debug --cursor text-2048-dev
167-
hud debug hud-browser:dev --max-phase 3
175+
hud debug . --max-phase 3 # Stop after phase 3
168176
"""
169-
177+
# Import here to avoid circular imports
178+
from .env_utils import get_image_name, is_environment_directory, build_environment, image_exists
179+
from hud.utils.design import HUDDesign
180+
181+
design = HUDDesign()
182+
170183
# Determine the command to run
171184
command = None
185+
docker_args = []
172186

173187
if config:
174188
# Load config from JSON file
@@ -186,13 +200,44 @@ def debug(
186200
console.print(f"[red]❌ {error or 'Failed to parse cursor config'}[/red]")
187201
raise typer.Exit(1)
188202
elif params:
189-
image, *docker_args = params
190-
# Build Docker command
191-
command = ["docker", "run", "--rm", "-i", *docker_args, image]
203+
first_param = params[0]
204+
docker_args = params[1:] if len(params) > 1 else []
205+
206+
# Check if it's a directory
207+
if Path(first_param).exists() and is_environment_directory(first_param):
208+
# Directory mode - like hud dev
209+
directory = first_param
210+
211+
# Get or generate image name
212+
image_name, source = get_image_name(directory)
213+
214+
if source == "auto":
215+
design.info(f"Auto-generated image name: {image_name}")
216+
217+
# Build if requested or if image doesn't exist
218+
if build or not image_exists(image_name):
219+
if not build and not image_exists(image_name):
220+
if typer.confirm(f"Image {image_name} not found. Build it now?"):
221+
build = True
222+
else:
223+
raise typer.Exit(1)
224+
225+
if build:
226+
if not build_environment(directory, image_name):
227+
raise typer.Exit(1)
228+
229+
# Build Docker command
230+
command = ["docker", "run", "--rm", "-i", *docker_args, image_name]
231+
else:
232+
# Assume it's an image name
233+
image = first_param
234+
command = ["docker", "run", "--rm", "-i", *docker_args, image]
192235
else:
193-
console.print("[red]Error: Must specify either a Docker image, --config, or --cursor[/red]")
236+
console.print("[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]")
194237
console.print("\nExamples:")
195-
console.print(" hud debug hud-text-2048:latest")
238+
console.print(" hud debug . # Debug current directory")
239+
console.print(" hud debug environments/browser # Debug specific directory")
240+
console.print(" hud debug hud-text-2048:latest # Debug Docker image")
196241
console.print(" hud debug --config mcp-config.json")
197242
console.print(" hud debug --cursor my-server")
198243
raise typer.Exit(1)
@@ -699,7 +744,19 @@ def eval(
699744
design.error(f"Invalid agent: {agent}. Must be one of: {', '.join(valid_agents)}")
700745
raise typer.Exit(1)
701746

702-
# Import and run the command
747+
# Import eval_command lazily to avoid importing agent dependencies
748+
try:
749+
from .eval import eval_command
750+
except ImportError as e:
751+
from hud.utils.design import HUDDesign
752+
design = HUDDesign()
753+
design.error(
754+
"Evaluation dependencies are not installed. "
755+
"Please install with: pip install 'hud-python[agent]'"
756+
)
757+
raise typer.Exit(1) from e
758+
759+
# Run the command
703760
eval_command(
704761
source=source,
705762
full=full,

hud/cli/analyze_metadata.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
from pathlib import Path
6+
from urllib.parse import quote
67

78
import requests
89
import yaml
@@ -26,7 +27,9 @@ def fetch_lock_from_registry(reference: str) -> dict | None:
2627
if "/" in reference and ":" not in reference:
2728
reference = f"{reference}:latest"
2829

29-
registry_url = f"{settings.hud_telemetry_url.rstrip('/')}/registry/envs/{reference}"
30+
# URL-encode the path segments to handle special characters in tags
31+
url_safe_path = "/".join(quote(part, safe="") for part in reference.split("/"))
32+
registry_url = f"{settings.hud_telemetry_url.rstrip('/')}/registry/envs/{url_safe_path}"
3033

3134
headers = {}
3235
if settings.api_key:

hud/cli/env_utils.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
"""Shared utilities for environment directory handling."""
2+
3+
from __future__ import annotations
4+
5+
import subprocess
6+
from pathlib import Path
7+
from typing import Any
8+
9+
import toml
10+
11+
from hud.utils.design import HUDDesign
12+
13+
design = HUDDesign()
14+
15+
16+
def get_image_name(directory: str | Path, image_override: str | None = None) -> tuple[str, str]:
17+
"""
18+
Resolve image name with source tracking.
19+
20+
Returns:
21+
Tuple of (image_name, source) where source is "override", "cache", or "auto"
22+
"""
23+
if image_override:
24+
return image_override, "override"
25+
26+
# Check pyproject.toml
27+
pyproject_path = Path(directory) / "pyproject.toml"
28+
if pyproject_path.exists():
29+
try:
30+
with open(pyproject_path) as f:
31+
config = toml.load(f)
32+
if config.get("tool", {}).get("hud", {}).get("image"):
33+
return config["tool"]["hud"]["image"], "cache"
34+
except Exception:
35+
pass # Silent failure, will use auto-generated name
36+
37+
# Auto-generate with :dev tag
38+
dir_path = Path(directory).resolve() # Get absolute path first
39+
dir_name = dir_path.name
40+
if not dir_name or dir_name == ".":
41+
# If we're in root or have empty name, use parent directory
42+
dir_name = dir_path.parent.name
43+
clean_name = dir_name.replace("_", "-")
44+
return f"hud-{clean_name}:dev", "auto"
45+
46+
47+
def update_pyproject_toml(directory: str | Path, image_name: str, silent: bool = False) -> None:
48+
"""Update pyproject.toml with image name."""
49+
pyproject_path = Path(directory) / "pyproject.toml"
50+
if pyproject_path.exists():
51+
try:
52+
with open(pyproject_path) as f:
53+
config = toml.load(f)
54+
55+
# Ensure [tool.hud] exists
56+
if "tool" not in config:
57+
config["tool"] = {}
58+
if "hud" not in config["tool"]:
59+
config["tool"]["hud"] = {}
60+
61+
# Update image name
62+
config["tool"]["hud"]["image"] = image_name
63+
64+
# Write back
65+
with open(pyproject_path, "w") as f:
66+
toml.dump(config, f)
67+
68+
if not silent:
69+
design.success(f"Updated pyproject.toml with image: {image_name}")
70+
except Exception as e:
71+
if not silent:
72+
design.warning(f"Could not update pyproject.toml: {e}")
73+
74+
75+
def build_environment(directory: str | Path, image_name: str, no_cache: bool = False) -> bool:
76+
"""Build Docker image for an environment.
77+
78+
Returns:
79+
True if build succeeded, False otherwise
80+
"""
81+
build_cmd = ["docker", "build", "-t", image_name]
82+
if no_cache:
83+
build_cmd.append("--no-cache")
84+
build_cmd.append(str(directory))
85+
86+
design.info(f"🔨 Building image: {image_name}{' (no cache)' if no_cache else ''}")
87+
design.info("") # Empty line before Docker output
88+
89+
# Just run Docker build directly - it has its own nice live display
90+
result = subprocess.run(build_cmd) # noqa: S603
91+
92+
if result.returncode == 0:
93+
design.info("") # Empty line after Docker output
94+
design.success(f"Build successful! Image: {image_name}")
95+
# Update pyproject.toml (silently since we already showed success)
96+
update_pyproject_toml(directory, image_name, silent=True)
97+
return True
98+
else:
99+
design.error("Build failed!")
100+
return False
101+
102+
103+
def image_exists(image_name: str) -> bool:
104+
"""Check if a Docker image exists locally."""
105+
result = subprocess.run( # noqa: S603
106+
["docker", "image", "inspect", image_name], # noqa: S607
107+
stdout=subprocess.DEVNULL,
108+
stderr=subprocess.DEVNULL,
109+
)
110+
return result.returncode == 0
111+
112+
113+
def is_environment_directory(path: str | Path) -> bool:
114+
"""Check if a path looks like an environment directory.
115+
116+
An environment directory should have:
117+
- A Dockerfile
118+
- A pyproject.toml file
119+
- Optionally a src directory
120+
"""
121+
dir_path = Path(path)
122+
if not dir_path.is_dir():
123+
return False
124+
125+
# Must have Dockerfile
126+
if not (dir_path / "Dockerfile").exists():
127+
return False
128+
129+
# Must have pyproject.toml
130+
if not (dir_path / "pyproject.toml").exists():
131+
return False
132+
133+
return True

0 commit comments

Comments
 (0)