Skip to content

Commit d24c7b9

Browse files
authored
feat: hud console (#115)
* chore: move design system to hud console * fix: better hud_console coloring for dark mode * misc: bump version number * nit: ruff * nit: ruff pt 2 * nit: ruff check with dev
1 parent 9b01eb2 commit d24c7b9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1080
-1024
lines changed

examples/run_evaluation.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@
4747

4848
logger = logging.getLogger(__name__)
4949

50+
# Uncomment to enable logging
51+
# logging.basicConfig(
52+
# level=logging.INFO, format="%(asctime)s - %(name)s - %(message)s", datefmt="%H:%M:%S"
53+
# )
54+
5055
# ---------------------------------------------------------------------------
5156
# Agent factory helpers
5257
# ---------------------------------------------------------------------------
@@ -94,6 +99,10 @@ async def run_single_task(
9499
) -> None:
95100
"""Load *one* task from *dataset_name* and execute it."""
96101

102+
# Enable agent step logging for single task mode
103+
logging.getLogger("hud.agents").setLevel(logging.INFO)
104+
logging.getLogger("hud.agents.base").setLevel(logging.INFO)
105+
97106
print("📊 Loading dataset…")
98107
dataset = load_dataset(dataset_name, split="train")
99108

hud/agents/base.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import mcp.types as types
1212

1313
from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
14-
from hud.utils.design import HUDDesign
14+
from hud.utils.hud_console import HUDConsole
1515
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
1616

1717
if TYPE_CHECKING:
@@ -37,7 +37,7 @@ class MCPAgent(ABC):
3737
and automatic marking of lifecycle tools (setup/evaluate) from a `Task`.
3838
- Messaging: system prompt handling, optional inclusion of setup output on
3939
the first turn, and control over initial screenshots.
40-
- Telemetry & UX: standardized logging/printing via `HUDDesign` and optional
40+
- Telemetry & UX: standardized logging/printing via `HUDConsole` and optional
4141
automatic tracing (`auto_trace`).
4242
4343
Subclasses implement provider-specific formatting and response fetching
@@ -92,11 +92,11 @@ def __init__(
9292
self._auto_created_client = False # Track if we created the client
9393

9494
self.model_name = model_name
95-
self.design = HUDDesign(logger=logger)
95+
self.console = HUDConsole(logger=logger)
9696

9797
# Set verbose mode if requested
9898
if verbose:
99-
self.design.set_verbose(True)
99+
self.console.set_verbose(True)
100100

101101
# Filtering
102102
self.allowed_tools = allowed_tools
@@ -131,7 +131,7 @@ async def initialize(self, task: str | Task | None = None) -> None:
131131

132132
self.mcp_client = MCPClient(mcp_config=task.mcp_config)
133133
self._auto_created_client = True
134-
self.design.info_log("Auto-created MCPClient from task.mcp_config")
134+
self.console.info_log("Auto-created MCPClient from task.mcp_config")
135135

136136
# Ensure we have a client
137137
if self.mcp_client is None:
@@ -168,7 +168,7 @@ async def initialize(self, task: str | Task | None = None) -> None:
168168
await self._filter_tools()
169169

170170
num_tools = len(self._available_tools)
171-
self.design.success_log(
171+
self.console.success_log(
172172
f"Agent initialized with {num_tools} available tools (after filtering)"
173173
)
174174

@@ -243,7 +243,7 @@ async def run_task(self, task: Task, max_steps: int = 10) -> Trace:
243243

244244
# Execute the setup tool and append the initial observation to the context
245245
if task.setup_tool is not None:
246-
self.design.progress_log(f"Setting up tool phase: {task.setup_tool}")
246+
self.console.progress_log(f"Setting up tool phase: {task.setup_tool}")
247247
results = await self.call_tools(task.setup_tool)
248248
if any(result.isError for result in results):
249249
raise RuntimeError(f"{results}")
@@ -257,15 +257,15 @@ async def run_task(self, task: Task, max_steps: int = 10) -> Trace:
257257
prompt_result = await self._run_context(start_context, max_steps=max_steps)
258258

259259
except Exception as e:
260-
self.design.error_log(f"Task execution failed: {e}")
260+
self.console.error_log(f"Task execution failed: {e}")
261261
# Create an error result but don't return yet - we still want to evaluate
262262
prompt_result = Trace(reward=0.0, done=True, content=str(e), isError=True)
263263
prompt_result.populate_from_context()
264264

265265
# Always evaluate if we have a prompt result and evaluate tool
266266
if prompt_result is not None and task.evaluate_tool is not None:
267267
try:
268-
self.design.progress_log(f"Evaluating tool phase: {task.evaluate_tool}")
268+
self.console.progress_log(f"Evaluating tool phase: {task.evaluate_tool}")
269269
results = await self.call_tools(task.evaluate_tool)
270270

271271
if any(result.isError for result in results):
@@ -288,7 +288,7 @@ async def run_task(self, task: Task, max_steps: int = 10) -> Trace:
288288
prompt_result.content = eval_content
289289

290290
except Exception as e:
291-
self.design.error_log(f"Evaluation phase failed: {e}")
291+
self.console.error_log(f"Evaluation phase failed: {e}")
292292
# Continue with the prompt result even if evaluation failed
293293

294294
return (
@@ -319,21 +319,21 @@ async def _run_context(
319319

320320
# Add initial context
321321
messages.extend(await self.format_message(context))
322-
self.design.debug(f"Messages: {messages}")
322+
self.console.debug(f"Messages: {messages}")
323323

324324
step_count = 0
325325
while max_steps == -1 or step_count < max_steps:
326326
step_count += 1
327327
if max_steps == -1:
328-
self.design.debug(f"Step {step_count} (unlimited)")
328+
self.console.debug(f"Step {step_count} (unlimited)")
329329
else:
330-
self.design.debug(f"Step {step_count}/{max_steps}")
330+
self.console.debug(f"Step {step_count}/{max_steps}")
331331

332332
try:
333333
# 1. Get model response
334334
response = await self.get_response(messages)
335335

336-
self.design.debug(f"Agent:\n{response}")
336+
self.console.debug(f"Agent:\n{response}")
337337

338338
# Check if we should stop
339339
if response.done or not response.tool_calls:
@@ -345,16 +345,16 @@ async def _run_context(
345345
response.content
346346
)
347347
except Exception as e:
348-
self.design.warning_log(f"ResponseAgent failed: {e}")
348+
self.console.warning_log(f"ResponseAgent failed: {e}")
349349
if decision == "STOP":
350350
# Try to submit response through lifecycle tool
351351
await self._maybe_submit_response(response, messages)
352352

353-
self.design.debug("Stopping execution")
353+
self.console.debug("Stopping execution")
354354
final_response = response
355355
break
356356
else:
357-
self.design.debug("Continuing execution")
357+
self.console.debug("Continuing execution")
358358
messages.extend(await self.format_message(decision))
359359
continue
360360

@@ -376,21 +376,21 @@ async def _run_context(
376376
for call, result in zip(tool_calls, tool_results, strict=False):
377377
step_info += f"\n{call}\n{result}"
378378

379-
self.design.info_log(step_info)
379+
self.console.info_log(step_info)
380380

381381
except Exception as e:
382-
self.design.error_log(f"Step failed: {e}")
382+
self.console.error_log(f"Step failed: {e}")
383383
error = str(e)
384384
break
385385

386386
except KeyboardInterrupt:
387-
self.design.warning_log("Agent execution interrupted by user")
387+
self.console.warning_log("Agent execution interrupted by user")
388388
error = "Interrupted by user"
389389
except asyncio.CancelledError:
390-
self.design.warning_log("Agent execution cancelled")
390+
self.console.warning_log("Agent execution cancelled")
391391
error = "Cancelled"
392392
except Exception as e:
393-
self.design.error_log(f"Unexpected error: {e}")
393+
self.console.error_log(f"Unexpected error: {e}")
394394
error = str(e)
395395

396396
# Build result
@@ -431,17 +431,17 @@ async def call_tools(
431431
results: list[MCPToolResult] = []
432432
for tc in tool_call:
433433
try:
434-
self.design.debug(f"Calling tool: {tc}")
434+
self.console.debug(f"Calling tool: {tc}")
435435
results.append(await self.mcp_client.call_tool(tc))
436436
except TimeoutError as e:
437-
self.design.error_log(f"Tool execution timed out: {e}")
437+
self.console.error_log(f"Tool execution timed out: {e}")
438438
try:
439439
await self.mcp_client.shutdown()
440440
except Exception as close_err:
441-
self.design.debug(f"Failed to close MCP client cleanly: {close_err}")
441+
self.console.debug(f"Failed to close MCP client cleanly: {close_err}")
442442
raise
443443
except Exception as e:
444-
self.design.error_log(f"Tool execution failed: {e}")
444+
self.console.error_log(f"Tool execution failed: {e}")
445445
results.append(_format_error_result(str(e)))
446446
return results
447447

@@ -573,7 +573,7 @@ async def _filter_tools(self) -> None:
573573

574574
# Add to lifecycle tools if found
575575
if response_tool_name and response_tool_name not in self.lifecycle_tools:
576-
self.design.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
576+
self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
577577
self.response_tool_name = response_tool_name
578578
self.lifecycle_tools.append(response_tool_name)
579579

@@ -597,7 +597,7 @@ async def _maybe_submit_response(self, response: AgentResponse, messages: list[A
597597
messages: The current message history (will be modified in-place)
598598
"""
599599
if self.response_tool_name:
600-
self.design.debug(f"Calling response lifecycle tool: {self.response_tool_name}")
600+
self.console.debug(f"Calling response lifecycle tool: {self.response_tool_name}")
601601
try:
602602
# Call the response tool with the agent's response
603603
response_tool_call = MCPToolCall(
@@ -612,9 +612,9 @@ async def _maybe_submit_response(self, response: AgentResponse, messages: list[A
612612
messages.extend(response_messages)
613613

614614
# Mark the task as done
615-
self.design.debug("Response lifecycle tool executed, marking task as done")
615+
self.console.debug("Response lifecycle tool executed, marking task as done")
616616
except Exception as e:
617-
self.design.error_log(f"Response lifecycle tool failed: {e}")
617+
self.console.error_log(f"Response lifecycle tool failed: {e}")
618618

619619
async def _setup_config(self, mcp_config: dict[str, dict[str, Any]]) -> None:
620620
"""Inject metadata into the metadata of the initialize request."""
@@ -668,19 +668,19 @@ async def _cleanup(self) -> None:
668668
if self._auto_trace_cm:
669669
try:
670670
self._auto_trace_cm.__exit__(None, None, None)
671-
self.design.debug("Closed auto-created trace")
671+
self.console.debug("Closed auto-created trace")
672672
except Exception as e:
673-
self.design.warning_log(f"Failed to close auto-created trace: {e}")
673+
self.console.warning_log(f"Failed to close auto-created trace: {e}")
674674
finally:
675675
self._auto_trace_cm = None
676676

677677
# Clean up auto-created client
678678
if self._auto_created_client and self.mcp_client:
679679
try:
680680
await self.mcp_client.shutdown()
681-
self.design.debug("Closed auto-created MCPClient")
681+
self.console.debug("Closed auto-created MCPClient")
682682
except Exception as e:
683-
self.design.warning_log(f"Failed to close auto-created client: {e}")
683+
self.console.warning_log(f"Failed to close auto-created client: {e}")
684684
finally:
685685
self.mcp_client = None
686686
self._auto_created_client = False
@@ -713,13 +713,13 @@ def _handle_connection_error(self, e: Exception) -> None:
713713
if self._is_connection_error(e):
714714
msg = self._get_connection_error_message(e)
715715
# Always show connection errors, not just when logging is enabled
716-
self.design.error(f"❌ {msg}")
717-
self.design.info("💡 Make sure the MCP server is started before running the agent.")
716+
self.console.error(f"❌ {msg}")
717+
self.console.info("💡 Make sure the MCP server is started before running the agent.")
718718

719719
# For localhost, provide specific instructions
720720
error_str = str(e).lower()
721721
if "localhost" in error_str or "127.0.0.1" in error_str:
722-
self.design.info(" Run 'hud dev' in another terminal to start the MCP server")
722+
self.console.info(" Run 'hud dev' in another terminal to start the MCP server")
723723

724724
raise RuntimeError(msg) from e
725725
raise

hud/agents/openai.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ async def get_response(self, messages: ResponseInputMessageContentListParam) ->
204204
break
205205

206206
if not latest_screenshot:
207-
self.design.warning_log("No screenshot provided for response to action")
207+
self.console.warning_log("No screenshot provided for response to action")
208208
return AgentResponse(
209209
content="No screenshot available for next action",
210210
tool_calls=[],
@@ -327,7 +327,7 @@ async def format_tool_results(
327327
for content in result.content:
328328
if isinstance(content, types.TextContent):
329329
# Don't add error text as input_text, just track it
330-
self.design.error_log(f"Tool error: {content.text}")
330+
self.console.error_log(f"Tool error: {content.text}")
331331
elif isinstance(content, types.ImageContent):
332332
# Even error results might have images
333333
latest_screenshot = content.data

0 commit comments

Comments
 (0)