|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +OpenAI Chat Agent playing Browser 2048 |
| 4 | + |
| 5 | +This example demonstrates using the OpenAIChatAgent with the browser-based 2048 game. |
| 6 | +It shows how to: |
| 7 | +- Initialize an OpenAI client with browser automation capabilities |
| 8 | +- Configure the browser-2048 environment with Docker |
| 9 | +- Use computer vision and interaction tools to play the game |
| 10 | + |
| 11 | +Requirements: |
| 12 | +- pip install openai |
| 13 | +- export OPENAI_API_KEY="your-api-key" # Or set OPENAI_BASE_URL for custom endpoints |
| 14 | +- Docker installed and running |
| 15 | + |
| 16 | +Environment Variables: |
| 17 | +- OPENAI_BASE_URL: Custom OpenAI-compatible API endpoint (optional) |
| 18 | +- OPENAI_API_KEY: API key for authentication |
| 19 | +""" |
| 20 | + |
| 21 | +import asyncio |
| 22 | +import os |
| 23 | +from openai import AsyncOpenAI |
| 24 | +import hud |
| 25 | +from hud.agents.openai_chat_generic import GenericOpenAIChatAgent |
| 26 | +from hud.clients import MCPClient |
| 27 | +from hud.datasets import Task |
| 28 | + |
| 29 | + |
| 30 | +async def main(): |
| 31 | + # Initialize OpenAI client with environment variables |
| 32 | + base_url = os.getenv("OPENAI_BASE_URL") |
| 33 | + api_key = os.getenv("OPENAI_API_KEY") |
| 34 | + |
| 35 | + openai_client = AsyncOpenAI( |
| 36 | + base_url=base_url if base_url else None, |
| 37 | + api_key=api_key, |
| 38 | + ) |
| 39 | + |
| 40 | + # Configure the browser-2048 environment |
| 41 | + mcp_config = { |
| 42 | + "local": { |
| 43 | + "command": "docker", |
| 44 | + "args": ["run", "--rm", "-i", "-p", "8080:8080", "hudevals/hud-browser:0.1.3"], |
| 45 | + } |
| 46 | + } |
| 47 | + |
| 48 | + system_prompt = """You are an expert 2048 game player using a browser interface. Your goal is to reach the tile specified by the user. |
| 49 | + |
| 50 | +HOW 2048 WORKS: |
| 51 | +- 4x4 grid with numbered tiles (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048...) |
| 52 | +- When you move, all tiles slide in that direction |
| 53 | +- When two tiles with SAME number touch, they merge into one (2+2=4, 4+4=8, etc.) |
| 54 | +- After each move, a new tile (2 or 4) appears randomly |
| 55 | +- Game ends when grid is full and no merges possible |
| 56 | + |
| 57 | +BROWSER INTERACTION USING THE COMPUTER TOOL: |
| 58 | +1. FIRST TURN ONLY - TAKE SCREENSHOT: |
| 59 | + Use: computer(action="screenshot") |
| 60 | + This captures the initial game state. Only needed for your first turn. |
| 61 | + After that, the environment will automatically return an image with each successful move. |
| 62 | + |
| 63 | +2. MAKE MOVES - Use arrow keys by calling the computer tool with action="press": |
| 64 | + - Move UP: computer(action="press", keys=["up"]) |
| 65 | + - Move DOWN: computer(action="press", keys=["down"]) |
| 66 | + - Move LEFT: computer(action="press", keys=["left"]) |
| 67 | + - Move RIGHT: computer(action="press", keys=["right"]) |
| 68 | + |
| 69 | +CRITICAL RULES: |
| 70 | +- Make exactly ONE move per turn using the press action with arrow keys |
| 71 | +- Continue playing until you reach the target or the game ends, no need to ask the user for confirmation. |
| 72 | + |
| 73 | +Strategy tips: |
| 74 | +- Keep your highest tiles in a corner |
| 75 | +- Build tiles in descending order from the corner |
| 76 | +- Avoid random moves - be strategic |
| 77 | +- Try to keep the board organized""" |
| 78 | + |
| 79 | + # Define the task with browser game setup and evaluation |
| 80 | + task = Task( |
| 81 | + prompt="""Play the browser-based 2048 game and try to reach the 128 tile. |
| 82 | + |
| 83 | + Start by taking a screenshot to see the initial game board, then make strategic moves using arrow keys. |
| 84 | + After your first screenshot, the game board will be automatically shown after each successful move.""", |
| 85 | + mcp_config=mcp_config, |
| 86 | + setup_tool={"name": "launch_app", "arguments": {"app_name": "2048"}}, # type: ignore |
| 87 | + evaluate_tool={ |
| 88 | + "name": "evaluate", |
| 89 | + "arguments": {"name": "game_2048_max_number", "arguments": {"target": 128}}, |
| 90 | + }, # type: ignore |
| 91 | + ) |
| 92 | + |
| 93 | + # Initialize MCP client |
| 94 | + client = MCPClient(mcp_config=task.mcp_config) |
| 95 | + |
| 96 | + model_name = "z-ai/glm-4.5v" # "z-ai/glm-4.5v", "Qwen/Qwen2.5-VL-7B-Instruct" etc... |
| 97 | + |
| 98 | + # Create OpenAI agent with browser automation tools |
| 99 | + agent = GenericOpenAIChatAgent( |
| 100 | + mcp_client=client, |
| 101 | + openai_client=openai_client, |
| 102 | + model_name=model_name, |
| 103 | + allowed_tools=["computer"], |
| 104 | + parallel_tool_calls=False, |
| 105 | + system_prompt=system_prompt, |
| 106 | + ) |
| 107 | + |
| 108 | + agent.metadata = {} |
| 109 | + |
| 110 | + # Run the game with tracing |
| 111 | + with hud.trace("OpenAI Browser 2048 Game"): |
| 112 | + try: |
| 113 | + print("🎮 Starting browser-based 2048 game with OpenAI agent...") |
| 114 | + print(f"🤖 Model: {agent.model_name}") |
| 115 | + print(f"🌐 Browser environment running on localhost:8080") |
| 116 | + print("=" * 50) |
| 117 | + |
| 118 | + result = await agent.run(task, max_steps=100) |
| 119 | + |
| 120 | + # Display results |
| 121 | + print("=" * 50) |
| 122 | + print(f"✅ Game completed!") |
| 123 | + print(f"🏆 Final Score/Max Tile: {result.reward}") |
| 124 | + if result.info: |
| 125 | + print(f"📊 Game Stats: {result.info}") |
| 126 | + |
| 127 | + print("\n📝 Full interaction trace:") |
| 128 | + for i, msg in enumerate(agent.conversation_history): |
| 129 | + print(f" {i + 1} : {msg}") |
| 130 | + print("-" * 30) |
| 131 | + |
| 132 | + except Exception as e: |
| 133 | + print(f"❌ Error during game: {e}") |
| 134 | + finally: |
| 135 | + await client.shutdown() |
| 136 | + |
| 137 | + |
| 138 | +if __name__ == "__main__": |
| 139 | + asyncio.run(main()) |
0 commit comments