Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions examples/openai_2048.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/usr/bin/env python3
"""
OpenAI Chat Agent playing Text 2048

This example demonstrates using the OpenAIChatAgent with the text-2048 environment.
It shows how to:
- Initialize an OpenAI client with the openai_chat agent
- Configure the text-2048 environment
- Run the agent to play the game

Requirements:
- pip install openai
- export OPENAI_API_KEY="your-api-key" # Or set OPENAI_BASE_URL for custom endpoints

Environment Variables:
- OPENAI_BASE_URL: Custom OpenAI-compatible API endpoint
- OPENAI_API_KEY: API key for authentication
"""

import asyncio
import os
from openai import AsyncOpenAI
import hud
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
from hud.clients import MCPClient
from hud.datasets import Task


async def main():
# Initialize OpenAI client with environment variables
base_url = os.getenv("OPENAI_BASE_URL")
api_key = os.getenv("OPENAI_API_KEY")

openai_client = AsyncOpenAI(
base_url=base_url if base_url else None, # None will use default OpenAI endpoint
api_key=api_key,
)

mcp_config = {
"local": {
"command": "docker",
"args": ["run", "--rm", "-i", "hudevals/hud-text-2048:latest"],
}
}

system_prompt = """You are an expert 2048 game player. Your goal is to reach the tile specified by the user.

HOW 2048 WORKS:
- 4x4 grid with numbered tiles (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048...)
- When you move, all tiles slide in that direction
- When two tiles with SAME number touch, they merge into one (2+2=4, 4+4=8, etc.)
- After each move, a new tile (2 or 4) appears randomly
- Game ends when grid is full and no merges possible

CRITICAL RULES:
- ALWAYS analyze the board before moving
- ALWAYS make a tool call for your move
- Use the 'move' tool with these choices: "up", "down", "left", or "right"
- Remember: ALL strings in JSON must have quotes!
- Make exactly ONE move per turn
- NEVER ask for permission - just keep playing until the game ends
- Don't ask "Should I continue?" - just make your next move

Example tool call: {"name": "move", "arguments": {"direction": "right"}}"""

# Define the task with game setup and evaluation
task = Task(
prompt="""Aim for the 128 tile (atleast 800 points!)""",
mcp_config=mcp_config,
setup_tool={
"name": "setup",
"arguments": {"name": "board", "arguments": {"board_size": 4}},
}, # type: ignore
evaluate_tool={"name": "evaluate", "arguments": {"name": "max_number", "arguments": {}}}, # type: ignore
)

# Initialize MCP client
client = MCPClient(mcp_config=task.mcp_config)

model_name = "gpt-5-mini" # Replace with your model name
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Nonexistent Model Reference Causes API Errors

The examples specify "gpt-5-mini" as the OpenAI model. This model doesn't exist, which will cause a runtime error when the agent attempts to make API calls. This looks like placeholder code that was committed.

Additional Locations (1)

Fix in Cursor Fix in Web


# Create OpenAI agent with the text-2048 game tools
agent = GenericOpenAIChatAgent(
mcp_client=client,
openai_client=openai_client,
model_name=model_name,
allowed_tools=["move"],
parallel_tool_calls=False,
system_prompt=system_prompt,
)

agent.metadata = {}

with hud.trace("OpenAI 2048 Game"):
try:
print("🎮 Starting 2048 game with OpenAI agent...")
print(f"🤖 Model: {agent.model_name}")
print("=" * 50)

result = await agent.run(task, max_steps=-1)

# Display results
print("=" * 50)
print(f"✅ Game completed!")
print(f"🏆 Final Score/Max Tile: {result.reward}")
if result.info:
print(f"📊 Game Stats: {result.info}")

# Display conversation history
print("🗣️ Conversation History:")
for i, msg in enumerate(agent.conversation_history):
print(f" {i + 1} : {msg}")
print("-" * 30)

except Exception as e:
print(f"❌ Error during game: {e}")
finally:
await client.shutdown()


if __name__ == "__main__":
asyncio.run(main())
134 changes: 134 additions & 0 deletions examples/openai_browser_2048.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
"""
OpenAI Chat Agent playing Browser 2048

This example demonstrates using the OpenAIChatAgent with the browser-based 2048 game.
It shows how to:
- Initialize an OpenAI client with browser automation capabilities
- Configure the browser-2048 environment with Docker
- Use computer vision and interaction tools to play the game

Requirements:
- pip install openai
- export OPENAI_API_KEY="your-api-key" # Or set OPENAI_BASE_URL for custom endpoints
- Docker installed and running

Environment Variables:
- OPENAI_BASE_URL: Custom OpenAI-compatible API endpoint (optional)
- OPENAI_API_KEY: API key for authentication
"""

import asyncio
import os
from openai import AsyncOpenAI
import hud
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
from hud.clients import MCPClient
from hud.datasets import Task


async def main():
# Initialize OpenAI client with environment variables
base_url = os.getenv("OPENAI_BASE_URL")
api_key = os.getenv("OPENAI_API_KEY")

openai_client = AsyncOpenAI(
base_url=base_url if base_url else None,
api_key=api_key,
)

# Configure the browser-2048 environment
mcp_config = {
"local": {
"command": "docker",
"args": ["run", "--rm", "-i", "-p", "8080:8080", "hudevals/hud-browser:0.1.3"],
}
}

system_prompt = """You are an expert 2048 game player using a browser interface. Your goal is to reach the tile specified by the user.

HOW 2048 WORKS:
- 4x4 grid with numbered tiles (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048...)
- When you move, all tiles slide in that direction
- When two tiles with SAME number touch, they merge into one (2+2=4, 4+4=8, etc.)
- After each move, a new tile (2 or 4) appears randomly
- Game ends when grid is full and no merges possible

BROWSER INTERACTION USING THE COMPUTER TOOL:
1. TAKE SCREENSHOTS:
Use: computer(action="screenshot")
This captures the current game state

2. MAKE MOVES - Use arrow keys by calling the computer tool with action="press":
- Move UP: computer(action="press", keys=["up"])
- Move DOWN: computer(action="press", keys=["down"])
- Move LEFT: computer(action="press", keys=["left"])
- Move RIGHT: computer(action="press", keys=["right"])

CRITICAL RULES:
- Take a screenshot first to see the board state at the start of the game
- Make exactly ONE move per turn using the press action with arrow keys
- Continue playing until you reach the target or the game ends

Strategy tips:
- Keep your highest tiles in a corner
- Build tiles in descending order from the corner
- Avoid random moves - be strategic
- Try to keep the board organized"""

# Define the task with browser game setup and evaluation
task = Task(
prompt="""Play the browser-based 2048 game and try to reach the 128 tile.

Take screenshots to see the game board, then make strategic moves using the browser interface.
You can use arrow keys or mouse gestures to move tiles.""",
mcp_config=mcp_config,
setup_tool={"name": "launch_app", "arguments": {"app_name": "2048"}}, # type: ignore
evaluate_tool={
"name": "evaluate",
"arguments": {"name": "game_2048_max_number", "arguments": {"target": 128}},
}, # type: ignore
)

# Initialize MCP client
client = MCPClient(mcp_config=task.mcp_config)

model_name = "gpt-5-mini" # "z-ai/glm-4.5v", "Qwen/Qwen2.5-VL-7B-Instruct" etc...
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Invalid Model Name Causes API Failures

The openai_browser_2048.py and openai_2048.py examples use model_name='gpt-5-mini'. This model is not a valid OpenAI model, which will cause API calls to fail at runtime.

Additional Locations (1)

Fix in Cursor Fix in Web


# Create OpenAI agent with browser automation tools
agent = GenericOpenAIChatAgent(
mcp_client=client,
openai_client=openai_client,
model_name=model_name,
allowed_tools=["computer"],
parallel_tool_calls=False,
system_prompt=system_prompt,
)

agent.metadata = {}

# Run the game with tracing
with hud.trace("OpenAI Browser 2048 Game"):
try:
print("🎮 Starting browser-based 2048 game with OpenAI agent...")
print(f"🤖 Model: {agent.model_name}")
print(f"🌐 Browser environment running on localhost:8080")
print("=" * 50)

result = await agent.run(task, max_steps=10)

# Display results
print("=" * 50)
print(f"✅ Game completed!")
print(f"🏆 Final Score/Max Tile: {result.reward}")
if result.info:
print(f"📊 Game Stats: {result.info}")

except Exception as e:
print(f"❌ Error during game: {e}")
finally:
await client.shutdown()


if __name__ == "__main__":
asyncio.run(main())
2 changes: 1 addition & 1 deletion hud/agents/misc/response_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ async def determine_response(self, agent_message: str) -> ResponseType:
"""
try:
response = await self.client.chat.completions.create(
model="gpt-4o",
model="gpt-5-nano",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Invalid Model Request Causes API Failures

The ResponseAgent now attempts to use "gpt-5-nano" for chat completions. This model is not a valid OpenAI model, likely a placeholder, causing API requests to fail.

Fix in Cursor Fix in Web

messages=[
{"role": "system", "content": self.system_prompt},
{
Expand Down
40 changes: 29 additions & 11 deletions hud/agents/openai_chat_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import mcp.types as types

from hud import instrument
from hud.types import AgentResponse, MCPToolCall, MCPToolResult

from .base import MCPAgent
Expand Down Expand Up @@ -52,6 +53,7 @@ def __init__(
self.model_name = model_name
self.parallel_tool_calls = parallel_tool_calls
self.logprobs = logprobs
self.conversation_history = []

@staticmethod
def _oai_to_mcp(tool_call: Any) -> MCPToolCall: # type: ignore[valid-type]
Expand All @@ -64,9 +66,7 @@ def _oai_to_mcp(tool_call: Any) -> MCPToolCall: # type: ignore[valid-type]

async def get_system_messages(self) -> list[Any]:
"""Get system messages for OpenAI."""
return [
{"role": "system", "content": self.system_prompt},
]
return [{"role": "system", "content": self.system_prompt}]

async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
"""Format blocks for OpenAI."""
Expand Down Expand Up @@ -96,8 +96,14 @@ def get_tool_schemas(self) -> list[dict]:
openai_tools.append(openai_tool)
return openai_tools

@instrument(
span_type="agent",
record_args=False,
record_result=True,
)
async def get_response(self, messages: list[Any]) -> AgentResponse:
"""Send chat request to OpenAI and convert the response."""

# Convert MCP tool schemas to OpenAI format
mcp_schemas = self.get_tool_schemas()

Expand All @@ -112,6 +118,19 @@ async def get_response(self, messages: list[Any]) -> AgentResponse:
choice = response.choices[0]
msg = choice.message

assistant_msg: dict[str, Any] = {"role": "assistant"}

if msg.content:
assistant_msg["content"] = msg.content

if msg.tool_calls:
assistant_msg["tool_calls"] = msg.tool_calls

messages.append(assistant_msg)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: get_response Method Mutates Input List

The get_response method unexpectedly mutates the input messages list by appending the assistant's response. This can cause issues like duplicate messages or incorrect conversation history for callers reusing the list.

Fix in Cursor Fix in Web


# Store the complete conversation history
self.conversation_history = messages.copy()

tool_calls = []
if msg.tool_calls:
for tc in msg.tool_calls:
Expand All @@ -123,7 +142,7 @@ async def get_response(self, messages: list[Any]) -> AgentResponse:
return AgentResponse(
content=msg.content or "",
tool_calls=tool_calls,
done=choice.finish_reason == "stop",
done=choice.finish_reason in ("stop", "length"),
raw=response, # Include raw response for access to Choice objects
)

Expand All @@ -144,11 +163,10 @@ async def format_tool_results(
for c in res.content
if hasattr(c, "text")
)
rendered.append(
{
"role": "tool",
"tool_call_id": call.id,
"content": content or "", # Ensure content is never None
}
)
tool_msg = {
"role": "tool",
"tool_call_id": call.id,
"content": content or "", # Ensure content is never None
}
rendered.append(tool_msg)
return rendered
Loading