Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/agents_tools/simple_task_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import asyncio
import logging
from hud.mcp import ClaudeMCPAgent
from hud.task import TaskConfig
from hud.datasets import TaskConfig
from mcp.types import CallToolRequestParams as MCPToolCall
from mcp_use import MCPClient

Expand Down
396 changes: 396 additions & 0 deletions examples/environments/gmail_local.ipynb

Large diffs are not rendered by default.

56 changes: 32 additions & 24 deletions examples/environments/gmail_local.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import asyncio
import hud
from hud.mcp import ClaudeMCPAgent, OpenAIMCPAgent
from hud.task import TaskConfig
from mcp_use import MCPClient
from hud.datasets import TaskConfig
from hud.mcp import ClaudeMCPAgent, OpenAIMCPAgent, MCPClient
from hud.mcp.base import AgentResult

import logging

logging.basicConfig(level=logging.INFO)


async def main():
with hud.trace("gmail_local"):
with hud.trace("Yutori Gmail Task Trace"):
task_dict = {
"prompt": "Open Sent mail, search for the Series B pitch deck, forward it to [email protected], and mark the original message as important.",
"prompt": "Open Sent mail, find the Series B pitch deck email, forward it to [email protected], and mark the original message as important.",
"mcp_config": {
"gmail": {
"command": "docker",
"command": "/bin/sh",
"args": [
"run",
"-i", # interactive mode for stdio MCP server
"--rm", # remove the container after it exits
"-p",
"6080:6080", # map port 6080 to the host for noVNC
"hudpython/gmail-clone:latest", # use hud gmail image
],
"-c",
"docker run -i --rm -p 6080:6080 gmail 2>/dev/null"
]
}
},
"setup_tool": {
Expand All @@ -37,36 +37,44 @@ async def main():
"metadata": {"id": "forward-series-b-deck-to-billgates"},
}

# Update the prompt to give a disclaimer that this is a testing environment (anthropic doesn't like doing some actions)
# Feel free to modify this
task_dict["prompt"] = (
task_dict["prompt"]
+ " This is a mock sandbox environment for evaluations, so feel free to execute any actions you want and do not ask me for confirmation before executing actions."
)

# Create TaskConfig from dict
task = TaskConfig(**task_dict)

print("📡 Defining the environment...")
print("🔴 See the agent live at http://localhost:6080/vnc.html")
client = MCPClient.from_dict({"mcp_config": task.mcp_config})
client = MCPClient(mcp_config=task.mcp_config)


agent = ClaudeMCPAgent( # or OpenAIMCPAgent
mcp_client=client,
model="claude-3-7-sonnet-20250219",
allowed_tools=["computer"],
model="claude-sonnet-4-20250514",
# Allowing anthropic_computer tool to be used because we're using ClaudeMCPAgent
allowed_tools=["anthropic_computer"], # Check our hud/tools/computer/anthropic.py
initial_screenshot=True,
)

print(f"📋 Task: {task.prompt}")
print(f"⚙️ Setup: {task.setup_tool}")
print(f"📊 Evaluate: {task.evaluate_tool}")

print(f"📊 Evaluate: {task.evaluate_tool}")
# Run the task
print("🚀 Running the task...")
eval_result = await agent.run(task, max_steps=10)
print(f"🎉 Task Result: {eval_result}")
print("🔴 See the agent live at http://localhost:6080/vnc.html")
eval_result: AgentResult = await agent.run(task, max_steps=30)

# Show formatted results
reward = eval_result.get("reward", 0.0)
print(f" 🏆 Reward: {reward}")
print(f"🎉 Task Result:")
print(f" 🏆 Reward: {eval_result.reward}")
print(f" 🔍 Content: {eval_result.content[:1000] if eval_result.content else 'No content'}...")

# Clean up
print("\n🧹 Cleaning up...")
await client.close_all_sessions()
await client.close()
print("✅ Done!")


Expand Down
Loading
Loading