Skip to content

Commit 5b5e5de

Browse files
committed
misc: better tool logging and evaluation results parsing
1 parent b68d4ab commit 5b5e5de

File tree

1 file changed

+38
-15
lines changed

1 file changed

+38
-15
lines changed

hud/mcp/base.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def get_system_prompt(self) -> str:
199199

200200
return base_prompt
201201

202-
async def call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
202+
async def call_tool(self, tool_call: MCPToolCall | None = None) -> MCPToolResult:
203203
"""
204204
Call a tool through the MCP client.
205205
@@ -209,6 +209,9 @@ async def call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
209209
Returns:
210210
The raw MCPToolResult
211211
"""
212+
if tool_call is None:
213+
raise ValueError("tool_call must be an MCPToolCall object")
214+
212215
tool_name = tool_call.name
213216
if not tool_name:
214217
raise ValueError("Tool call must have a 'name' field")
@@ -356,9 +359,9 @@ async def _run_task(self, task: TaskConfig, max_steps: int = 10) -> AgentResult:
356359
and eval_result.structuredContent is not None
357360
):
358361
return AgentResult(
359-
reward=self._find_reward(eval_result),
362+
reward=_find_reward(eval_result),
360363
done=True,
361-
content=eval_result.structuredContent["content"],
364+
content=_find_content(eval_result),
362365
messages=prompt_result.messages,
363366
)
364367
else:
@@ -382,18 +385,6 @@ async def _run_task(self, task: TaskConfig, max_steps: int = 10) -> AgentResult:
382385
except Exception as e:
383386
return AgentResult(reward=0.0, done=True, error=str(e))
384387

385-
def _find_reward(self, result: MCPToolResult) -> float:
386-
"""Find the reward in the result.
387-
388-
Agent accepts "reward", "grade", "score"
389-
390-
If not found, return 0.0
391-
"""
392-
accept_keys = ["reward", "grade", "score"]
393-
for key in accept_keys:
394-
if isinstance(result.structuredContent, dict) and key in result.structuredContent:
395-
return result.structuredContent[key]
396-
return 0.0
397388

398389
def _format_error_result(self, error_message: str) -> MCPToolResult:
399390
return MCPToolResult(
@@ -460,6 +451,7 @@ async def run_conversation(self, prompt: str, max_steps: int = 10) -> AgentResul
460451
tool_results = []
461452
for tool_call in tool_calls:
462453
try:
454+
logger.info("Calling tool: %s with args %s", tool_call.name, tool_call.arguments)
463455
result = await self.call_tool(tool_call)
464456
tool_results.append(result)
465457
except Exception as e:
@@ -523,6 +515,9 @@ async def _run_prompt(self, prompt: str, max_steps: int = 10) -> AgentResult:
523515
"Model response - Tool calls: %s",
524516
[tc.name for tc in response.tool_calls],
525517
)
518+
for tool_call in response.tool_calls:
519+
logger.info("Called tool: %s with args %s", tool_call.name, tool_call.arguments)
520+
526521
logger.info("Model response - Done: %s", response.done)
527522

528523
# Check if we should stop
@@ -629,3 +624,31 @@ async def create_user_message(self, text: str) -> Any:
629624
Formatted user message
630625
"""
631626
return {"role": "user", "content": text}
627+
628+
629+
630+
def _find_reward(result: MCPToolResult) -> float:
631+
"""Find the reward in the result.
632+
633+
Agent accepts "reward", "grade", "score"
634+
635+
If not found, return 0.0
636+
"""
637+
accept_keys = ["reward", "grade", "score"]
638+
for key in accept_keys:
639+
if isinstance(result.structuredContent, dict) and key in result.structuredContent:
640+
return result.structuredContent[key]
641+
return 0.0
642+
643+
def _find_content(result: MCPToolResult) -> str | None:
644+
"""Find the content in the result.
645+
646+
Agent accepts "content", "text", "message"
647+
648+
If not found, return 0.0
649+
"""
650+
accept_keys = ["content", "logs"]
651+
for key in accept_keys:
652+
if isinstance(result.structuredContent, dict) and key in result.structuredContent:
653+
return result.structuredContent[key]
654+
return None

0 commit comments

Comments
 (0)