Skip to content

Commit e7f9330

Browse files
authored
Merge pull request #107 from hud-evals/j/openai_api_agent
fixes/improvements for generic openai api agent
2 parents 40d2607 + 585d0db commit e7f9330

File tree

4 files changed

+422
-27
lines changed

4 files changed

+422
-27
lines changed

examples/openai_2048.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#!/usr/bin/env python3
2+
"""
3+
OpenAI Chat Agent playing Text 2048
4+
5+
This example demonstrates using the OpenAIChatAgent with the text-2048 environment.
6+
It shows how to:
7+
- Initialize an OpenAI client with the openai_chat agent
8+
- Configure the text-2048 environment
9+
- Run the agent to play the game
10+
11+
Requirements:
12+
- pip install openai
13+
- export OPENAI_API_KEY="your-api-key" # Or set OPENAI_BASE_URL for custom endpoints
14+
15+
Environment Variables:
16+
- OPENAI_BASE_URL: Custom OpenAI-compatible API endpoint
17+
- OPENAI_API_KEY: API key for authentication
18+
"""
19+
20+
import asyncio
21+
import os
22+
from openai import AsyncOpenAI
23+
import hud
24+
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
25+
from hud.clients import MCPClient
26+
from hud.datasets import Task
27+
28+
29+
async def main():
30+
# Initialize OpenAI client with environment variables
31+
base_url = os.getenv("OPENAI_BASE_URL")
32+
api_key = os.getenv("OPENAI_API_KEY")
33+
34+
openai_client = AsyncOpenAI(
35+
base_url=base_url if base_url else None, # None will use default OpenAI endpoint
36+
api_key=api_key,
37+
)
38+
39+
mcp_config = {
40+
"local": {
41+
"command": "docker",
42+
"args": ["run", "--rm", "-i", "hudevals/hud-text-2048:latest"],
43+
}
44+
}
45+
46+
system_prompt = """You are an expert 2048 game player. Your goal is to reach the tile specified by the user.
47+
48+
HOW 2048 WORKS:
49+
- 4x4 grid with numbered tiles (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048...)
50+
- When you move, all tiles slide in that direction
51+
- When two tiles with SAME number touch, they merge into one (2+2=4, 4+4=8, etc.)
52+
- After each move, a new tile (2 or 4) appears randomly
53+
- Game ends when grid is full and no merges possible
54+
55+
CRITICAL RULES:
56+
- ALWAYS analyze the board before moving
57+
- ALWAYS make a tool call for your move
58+
- Use the 'move' tool with these choices: "up", "down", "left", or "right"
59+
- Remember: ALL strings in JSON must have quotes!
60+
- Make exactly ONE move per turn
61+
- NEVER ask for permission - just keep playing until the game ends
62+
- Don't ask "Should I continue?" - just make your next move
63+
64+
Example tool call: {"name": "move", "arguments": {"direction": "right"}}"""
65+
66+
# Define the task with game setup and evaluation
67+
task = Task(
68+
prompt="""Aim for the 128 tile (atleast a score of 800!)""",
69+
mcp_config=mcp_config,
70+
setup_tool={
71+
"name": "setup",
72+
"arguments": {"name": "board", "arguments": {"board_size": 4}},
73+
}, # type: ignore
74+
evaluate_tool={
75+
"name": "evaluate",
76+
"arguments": {"name": "max_number", "arguments": {"target": 128}},
77+
}, # type: ignore
78+
)
79+
80+
# Initialize MCP client
81+
client = MCPClient(mcp_config=task.mcp_config)
82+
83+
model_name = "gpt-5-mini" # Replace with your model name
84+
85+
# Create OpenAI agent with the text-2048 game tools
86+
agent = GenericOpenAIChatAgent(
87+
mcp_client=client,
88+
openai_client=openai_client,
89+
model_name=model_name,
90+
allowed_tools=["move"],
91+
parallel_tool_calls=False,
92+
system_prompt=system_prompt,
93+
)
94+
95+
agent.metadata = {}
96+
97+
with hud.trace("OpenAI 2048 Game"):
98+
try:
99+
print("🎮 Starting 2048 game with OpenAI agent...")
100+
print(f"🤖 Model: {agent.model_name}")
101+
print("=" * 50)
102+
103+
result = await agent.run(task, max_steps=100)
104+
105+
# Display results
106+
print("=" * 50)
107+
print(f"✅ Game completed!")
108+
print(f"🏆 Final Score/Max Tile: {result.reward}")
109+
if result.info:
110+
print(f"📊 Game Stats: {result.info}")
111+
112+
# Display conversation history
113+
print("🗣️ Conversation History:")
114+
for i, msg in enumerate(agent.conversation_history):
115+
print(f" {i + 1} : {msg}")
116+
print("-" * 30)
117+
118+
except Exception as e:
119+
print(f"❌ Error during game: {e}")
120+
finally:
121+
await client.shutdown()
122+
123+
124+
if __name__ == "__main__":
125+
asyncio.run(main())

examples/openai_browser_2048.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#!/usr/bin/env python3
2+
"""
3+
OpenAI Chat Agent playing Browser 2048
4+
5+
This example demonstrates using the OpenAIChatAgent with the browser-based 2048 game.
6+
It shows how to:
7+
- Initialize an OpenAI client with browser automation capabilities
8+
- Configure the browser-2048 environment with Docker
9+
- Use computer vision and interaction tools to play the game
10+
11+
Requirements:
12+
- pip install openai
13+
- export OPENAI_API_KEY="your-api-key" # Or set OPENAI_BASE_URL for custom endpoints
14+
- Docker installed and running
15+
16+
Environment Variables:
17+
- OPENAI_BASE_URL: Custom OpenAI-compatible API endpoint (optional)
18+
- OPENAI_API_KEY: API key for authentication
19+
"""
20+
21+
import asyncio
22+
import os
23+
from openai import AsyncOpenAI
24+
import hud
25+
from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
26+
from hud.clients import MCPClient
27+
from hud.datasets import Task
28+
29+
30+
async def main():
31+
# Initialize OpenAI client with environment variables
32+
base_url = os.getenv("OPENAI_BASE_URL")
33+
api_key = os.getenv("OPENAI_API_KEY")
34+
35+
openai_client = AsyncOpenAI(
36+
base_url=base_url if base_url else None,
37+
api_key=api_key,
38+
)
39+
40+
# Configure the browser-2048 environment
41+
mcp_config = {
42+
"local": {
43+
"command": "docker",
44+
"args": ["run", "--rm", "-i", "-p", "8080:8080", "hudevals/hud-browser:0.1.3"],
45+
}
46+
}
47+
48+
system_prompt = """You are an expert 2048 game player using a browser interface. Your goal is to reach the tile specified by the user.
49+
50+
HOW 2048 WORKS:
51+
- 4x4 grid with numbered tiles (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048...)
52+
- When you move, all tiles slide in that direction
53+
- When two tiles with SAME number touch, they merge into one (2+2=4, 4+4=8, etc.)
54+
- After each move, a new tile (2 or 4) appears randomly
55+
- Game ends when grid is full and no merges possible
56+
57+
BROWSER INTERACTION USING THE COMPUTER TOOL:
58+
1. FIRST TURN ONLY - TAKE SCREENSHOT:
59+
Use: computer(action="screenshot")
60+
This captures the initial game state. Only needed for your first turn.
61+
After that, the environment will automatically return an image with each successful move.
62+
63+
2. MAKE MOVES - Use arrow keys by calling the computer tool with action="press":
64+
- Move UP: computer(action="press", keys=["up"])
65+
- Move DOWN: computer(action="press", keys=["down"])
66+
- Move LEFT: computer(action="press", keys=["left"])
67+
- Move RIGHT: computer(action="press", keys=["right"])
68+
69+
CRITICAL RULES:
70+
- Make exactly ONE move per turn using the press action with arrow keys
71+
- Continue playing until you reach the target or the game ends, no need to ask the user for confirmation.
72+
73+
Strategy tips:
74+
- Keep your highest tiles in a corner
75+
- Build tiles in descending order from the corner
76+
- Avoid random moves - be strategic
77+
- Try to keep the board organized"""
78+
79+
# Define the task with browser game setup and evaluation
80+
task = Task(
81+
prompt="""Play the browser-based 2048 game and try to reach the 128 tile.
82+
83+
Start by taking a screenshot to see the initial game board, then make strategic moves using arrow keys.
84+
After your first screenshot, the game board will be automatically shown after each successful move.""",
85+
mcp_config=mcp_config,
86+
setup_tool={"name": "launch_app", "arguments": {"app_name": "2048"}}, # type: ignore
87+
evaluate_tool={
88+
"name": "evaluate",
89+
"arguments": {"name": "game_2048_max_number", "arguments": {"target": 128}},
90+
}, # type: ignore
91+
)
92+
93+
# Initialize MCP client
94+
client = MCPClient(mcp_config=task.mcp_config)
95+
96+
model_name = "z-ai/glm-4.5v" # "z-ai/glm-4.5v", "Qwen/Qwen2.5-VL-7B-Instruct" etc...
97+
98+
# Create OpenAI agent with browser automation tools
99+
agent = GenericOpenAIChatAgent(
100+
mcp_client=client,
101+
openai_client=openai_client,
102+
model_name=model_name,
103+
allowed_tools=["computer"],
104+
parallel_tool_calls=False,
105+
system_prompt=system_prompt,
106+
)
107+
108+
agent.metadata = {}
109+
110+
# Run the game with tracing
111+
with hud.trace("OpenAI Browser 2048 Game"):
112+
try:
113+
print("🎮 Starting browser-based 2048 game with OpenAI agent...")
114+
print(f"🤖 Model: {agent.model_name}")
115+
print(f"🌐 Browser environment running on localhost:8080")
116+
print("=" * 50)
117+
118+
result = await agent.run(task, max_steps=100)
119+
120+
# Display results
121+
print("=" * 50)
122+
print(f"✅ Game completed!")
123+
print(f"🏆 Final Score/Max Tile: {result.reward}")
124+
if result.info:
125+
print(f"📊 Game Stats: {result.info}")
126+
127+
print("\n📝 Full interaction trace:")
128+
for i, msg in enumerate(agent.conversation_history):
129+
print(f" {i + 1} : {msg}")
130+
print("-" * 30)
131+
132+
except Exception as e:
133+
print(f"❌ Error during game: {e}")
134+
finally:
135+
await client.shutdown()
136+
137+
138+
if __name__ == "__main__":
139+
asyncio.run(main())

hud/agents/misc/response_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ async def determine_response(self, agent_message: str) -> ResponseType:
5454
"""
5555
try:
5656
response = await self.client.chat.completions.create(
57-
model="gpt-4o",
57+
model="gpt-5-nano",
5858
messages=[
5959
{"role": "system", "content": self.system_prompt},
6060
{

0 commit comments

Comments
 (0)