-
Notifications
You must be signed in to change notification settings - Fork 52
fixes/improvements for generic openai api agent #107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
4c2fd93
c2488b5
38b90fa
2f974ef
3d98335
5e4d745
ceb4f8f
585d0db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| OpenAI Chat Agent playing Text 2048 | ||
|
|
||
| This example demonstrates using the OpenAIChatAgent with the text-2048 environment. | ||
| It shows how to: | ||
| - Initialize an OpenAI client with the openai_chat agent | ||
| - Configure the text-2048 environment | ||
| - Run the agent to play the game | ||
|
|
||
| Requirements: | ||
| - pip install openai | ||
| - export OPENAI_API_KEY="your-api-key" # Or set OPENAI_BASE_URL for custom endpoints | ||
|
|
||
| Environment Variables: | ||
| - OPENAI_BASE_URL: Custom OpenAI-compatible API endpoint | ||
| - OPENAI_API_KEY: API key for authentication | ||
| """ | ||
|
|
||
| import asyncio | ||
| import os | ||
| from openai import AsyncOpenAI | ||
| import hud | ||
| from hud.agents.openai_chat_generic import GenericOpenAIChatAgent | ||
| from hud.clients import MCPClient | ||
| from hud.datasets import Task | ||
|
|
||
|
|
||
| async def main(): | ||
| # Initialize OpenAI client with environment variables | ||
| base_url = os.getenv("OPENAI_BASE_URL") | ||
| api_key = os.getenv("OPENAI_API_KEY") | ||
|
|
||
| openai_client = AsyncOpenAI( | ||
| base_url=base_url if base_url else None, # None will use default OpenAI endpoint | ||
| api_key=api_key, | ||
| ) | ||
|
|
||
| mcp_config = { | ||
| "local": { | ||
| "command": "docker", | ||
| "args": ["run", "--rm", "-i", "hudevals/hud-text-2048:latest"], | ||
| } | ||
| } | ||
|
|
||
| system_prompt = """You are an expert 2048 game player. Your goal is to reach the tile specified by the user. | ||
|
|
||
| HOW 2048 WORKS: | ||
| - 4x4 grid with numbered tiles (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048...) | ||
| - When you move, all tiles slide in that direction | ||
| - When two tiles with SAME number touch, they merge into one (2+2=4, 4+4=8, etc.) | ||
| - After each move, a new tile (2 or 4) appears randomly | ||
| - Game ends when grid is full and no merges possible | ||
|
|
||
| CRITICAL RULES: | ||
| - ALWAYS analyze the board before moving | ||
| - ALWAYS make a tool call for your move | ||
| - Use the 'move' tool with these choices: "up", "down", "left", or "right" | ||
| - Remember: ALL strings in JSON must have quotes! | ||
| - Make exactly ONE move per turn | ||
| - NEVER ask for permission - just keep playing until the game ends | ||
| - Don't ask "Should I continue?" - just make your next move | ||
|
|
||
| Example tool call: {"name": "move", "arguments": {"direction": "right"}}""" | ||
|
|
||
| # Define the task with game setup and evaluation | ||
| task = Task( | ||
| prompt="""Aim for the 128 tile (atleast 800 points!)""", | ||
| mcp_config=mcp_config, | ||
| setup_tool={ | ||
| "name": "setup", | ||
| "arguments": {"name": "board", "arguments": {"board_size": 4}}, | ||
| }, # type: ignore | ||
| evaluate_tool={"name": "evaluate", "arguments": {"name": "max_number", "arguments": {}}}, # type: ignore | ||
| ) | ||
|
|
||
| # Initialize MCP client | ||
| client = MCPClient(mcp_config=task.mcp_config) | ||
|
|
||
| model_name = "gpt-5-mini" # Replace with your model name | ||
|
|
||
| # Create OpenAI agent with the text-2048 game tools | ||
| agent = GenericOpenAIChatAgent( | ||
| mcp_client=client, | ||
| openai_client=openai_client, | ||
| model_name=model_name, | ||
| allowed_tools=["move"], | ||
| parallel_tool_calls=False, | ||
| system_prompt=system_prompt, | ||
| ) | ||
|
|
||
| agent.metadata = {} | ||
|
|
||
| with hud.trace("OpenAI 2048 Game"): | ||
| try: | ||
| print("🎮 Starting 2048 game with OpenAI agent...") | ||
| print(f"🤖 Model: {agent.model_name}") | ||
| print("=" * 50) | ||
|
|
||
| result = await agent.run(task, max_steps=-1) | ||
|
|
||
| # Display results | ||
| print("=" * 50) | ||
| print(f"✅ Game completed!") | ||
| print(f"🏆 Final Score/Max Tile: {result.reward}") | ||
| if result.info: | ||
| print(f"📊 Game Stats: {result.info}") | ||
|
|
||
| # Display conversation history | ||
| print("🗣️ Conversation History:") | ||
| for i, msg in enumerate(agent.conversation_history): | ||
| print(f" {i + 1} : {msg}") | ||
| print("-" * 30) | ||
|
|
||
| except Exception as e: | ||
| print(f"❌ Error during game: {e}") | ||
| finally: | ||
| await client.shutdown() | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| asyncio.run(main()) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| OpenAI Chat Agent playing Browser 2048 | ||
|
|
||
| This example demonstrates using the OpenAIChatAgent with the browser-based 2048 game. | ||
| It shows how to: | ||
| - Initialize an OpenAI client with browser automation capabilities | ||
| - Configure the browser-2048 environment with Docker | ||
| - Use computer vision and interaction tools to play the game | ||
|
|
||
| Requirements: | ||
| - pip install openai | ||
| - export OPENAI_API_KEY="your-api-key" # Or set OPENAI_BASE_URL for custom endpoints | ||
| - Docker installed and running | ||
|
|
||
| Environment Variables: | ||
| - OPENAI_BASE_URL: Custom OpenAI-compatible API endpoint (optional) | ||
| - OPENAI_API_KEY: API key for authentication | ||
| """ | ||
|
|
||
| import asyncio | ||
| import os | ||
| from openai import AsyncOpenAI | ||
| import hud | ||
| from hud.agents.openai_chat_generic import GenericOpenAIChatAgent | ||
| from hud.clients import MCPClient | ||
| from hud.datasets import Task | ||
|
|
||
|
|
||
| async def main(): | ||
| # Initialize OpenAI client with environment variables | ||
| base_url = os.getenv("OPENAI_BASE_URL") | ||
| api_key = os.getenv("OPENAI_API_KEY") | ||
|
|
||
| openai_client = AsyncOpenAI( | ||
| base_url=base_url if base_url else None, | ||
| api_key=api_key, | ||
| ) | ||
|
|
||
| # Configure the browser-2048 environment | ||
| mcp_config = { | ||
| "local": { | ||
| "command": "docker", | ||
| "args": ["run", "--rm", "-i", "-p", "8080:8080", "hudevals/hud-browser:0.1.3"], | ||
| } | ||
| } | ||
|
|
||
| system_prompt = """You are an expert 2048 game player using a browser interface. Your goal is to reach the tile specified by the user. | ||
|
|
||
| HOW 2048 WORKS: | ||
| - 4x4 grid with numbered tiles (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048...) | ||
| - When you move, all tiles slide in that direction | ||
| - When two tiles with SAME number touch, they merge into one (2+2=4, 4+4=8, etc.) | ||
| - After each move, a new tile (2 or 4) appears randomly | ||
| - Game ends when grid is full and no merges possible | ||
|
|
||
| BROWSER INTERACTION USING THE COMPUTER TOOL: | ||
| 1. TAKE SCREENSHOTS: | ||
| Use: computer(action="screenshot") | ||
| This captures the current game state | ||
|
|
||
| 2. MAKE MOVES - Use arrow keys by calling the computer tool with action="press": | ||
| - Move UP: computer(action="press", keys=["up"]) | ||
| - Move DOWN: computer(action="press", keys=["down"]) | ||
| - Move LEFT: computer(action="press", keys=["left"]) | ||
| - Move RIGHT: computer(action="press", keys=["right"]) | ||
|
|
||
| CRITICAL RULES: | ||
| - Take a screenshot first to see the board state at the start of the game | ||
| - Make exactly ONE move per turn using the press action with arrow keys | ||
| - Continue playing until you reach the target or the game ends | ||
|
|
||
| Strategy tips: | ||
| - Keep your highest tiles in a corner | ||
| - Build tiles in descending order from the corner | ||
| - Avoid random moves - be strategic | ||
| - Try to keep the board organized""" | ||
|
|
||
| # Define the task with browser game setup and evaluation | ||
| task = Task( | ||
| prompt="""Play the browser-based 2048 game and try to reach the 128 tile. | ||
|
|
||
| Take screenshots to see the game board, then make strategic moves using the browser interface. | ||
| You can use arrow keys or mouse gestures to move tiles.""", | ||
| mcp_config=mcp_config, | ||
| setup_tool={"name": "launch_app", "arguments": {"app_name": "2048"}}, # type: ignore | ||
| evaluate_tool={ | ||
| "name": "evaluate", | ||
| "arguments": {"name": "game_2048_max_number", "arguments": {"target": 128}}, | ||
| }, # type: ignore | ||
| ) | ||
|
|
||
| # Initialize MCP client | ||
| client = MCPClient(mcp_config=task.mcp_config) | ||
|
|
||
| model_name = "gpt-5-mini" # "z-ai/glm-4.5v", "Qwen/Qwen2.5-VL-7B-Instruct" etc... | ||
|
||
|
|
||
| # Create OpenAI agent with browser automation tools | ||
| agent = GenericOpenAIChatAgent( | ||
| mcp_client=client, | ||
| openai_client=openai_client, | ||
| model_name=model_name, | ||
| allowed_tools=["computer"], | ||
| parallel_tool_calls=False, | ||
| system_prompt=system_prompt, | ||
| ) | ||
|
|
||
| agent.metadata = {} | ||
|
|
||
| # Run the game with tracing | ||
| with hud.trace("OpenAI Browser 2048 Game"): | ||
| try: | ||
| print("🎮 Starting browser-based 2048 game with OpenAI agent...") | ||
| print(f"🤖 Model: {agent.model_name}") | ||
| print(f"🌐 Browser environment running on localhost:8080") | ||
| print("=" * 50) | ||
|
|
||
| result = await agent.run(task, max_steps=10) | ||
|
|
||
| # Display results | ||
| print("=" * 50) | ||
| print(f"✅ Game completed!") | ||
| print(f"🏆 Final Score/Max Tile: {result.reward}") | ||
| if result.info: | ||
| print(f"📊 Game Stats: {result.info}") | ||
|
|
||
| except Exception as e: | ||
| print(f"❌ Error during game: {e}") | ||
| finally: | ||
| await client.shutdown() | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| asyncio.run(main()) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,7 +54,7 @@ async def determine_response(self, agent_message: str) -> ResponseType: | |
| """ | ||
| try: | ||
| response = await self.client.chat.completions.create( | ||
| model="gpt-4o", | ||
| model="gpt-5-nano", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| messages=[ | ||
| {"role": "system", "content": self.system_prompt}, | ||
| { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
|
|
||
| import mcp.types as types | ||
|
|
||
| from hud import instrument | ||
| from hud.types import AgentResponse, MCPToolCall, MCPToolResult | ||
|
|
||
| from .base import MCPAgent | ||
|
|
@@ -52,6 +53,7 @@ def __init__( | |
| self.model_name = model_name | ||
| self.parallel_tool_calls = parallel_tool_calls | ||
| self.logprobs = logprobs | ||
| self.conversation_history = [] | ||
|
|
||
| @staticmethod | ||
| def _oai_to_mcp(tool_call: Any) -> MCPToolCall: # type: ignore[valid-type] | ||
|
|
@@ -64,9 +66,7 @@ def _oai_to_mcp(tool_call: Any) -> MCPToolCall: # type: ignore[valid-type] | |
|
|
||
| async def get_system_messages(self) -> list[Any]: | ||
| """Get system messages for OpenAI.""" | ||
| return [ | ||
| {"role": "system", "content": self.system_prompt}, | ||
| ] | ||
| return [{"role": "system", "content": self.system_prompt}] | ||
|
|
||
| async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]: | ||
| """Format blocks for OpenAI.""" | ||
|
|
@@ -96,8 +96,14 @@ def get_tool_schemas(self) -> list[dict]: | |
| openai_tools.append(openai_tool) | ||
| return openai_tools | ||
|
|
||
| @instrument( | ||
| span_type="agent", | ||
| record_args=False, | ||
| record_result=True, | ||
| ) | ||
| async def get_response(self, messages: list[Any]) -> AgentResponse: | ||
| """Send chat request to OpenAI and convert the response.""" | ||
|
|
||
| # Convert MCP tool schemas to OpenAI format | ||
| mcp_schemas = self.get_tool_schemas() | ||
|
|
||
|
|
@@ -112,6 +118,19 @@ async def get_response(self, messages: list[Any]) -> AgentResponse: | |
| choice = response.choices[0] | ||
| msg = choice.message | ||
|
|
||
| assistant_msg: dict[str, Any] = {"role": "assistant"} | ||
|
|
||
| if msg.content: | ||
| assistant_msg["content"] = msg.content | ||
|
|
||
| if msg.tool_calls: | ||
| assistant_msg["tool_calls"] = msg.tool_calls | ||
|
|
||
| messages.append(assistant_msg) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| # Store the complete conversation history | ||
| self.conversation_history = messages.copy() | ||
|
|
||
| tool_calls = [] | ||
| if msg.tool_calls: | ||
| for tc in msg.tool_calls: | ||
|
|
@@ -123,7 +142,7 @@ async def get_response(self, messages: list[Any]) -> AgentResponse: | |
| return AgentResponse( | ||
| content=msg.content or "", | ||
| tool_calls=tool_calls, | ||
| done=choice.finish_reason == "stop", | ||
| done=choice.finish_reason in ("stop", "length"), | ||
| raw=response, # Include raw response for access to Choice objects | ||
| ) | ||
|
|
||
|
|
@@ -144,11 +163,10 @@ async def format_tool_results( | |
| for c in res.content | ||
| if hasattr(c, "text") | ||
| ) | ||
| rendered.append( | ||
| { | ||
| "role": "tool", | ||
| "tool_call_id": call.id, | ||
| "content": content or "", # Ensure content is never None | ||
| } | ||
| ) | ||
| tool_msg = { | ||
| "role": "tool", | ||
| "tool_call_id": call.id, | ||
| "content": content or "", # Ensure content is never None | ||
| } | ||
| rendered.append(tool_msg) | ||
| return rendered | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: Nonexistent Model Reference Causes API Errors
The examples specify "gpt-5-mini" as the OpenAI model. This model doesn't exist, which will cause a runtime error when the agent attempts to make API calls. This looks like placeholder code that was committed.
Additional Locations (1)
examples/openai_browser_2048.py#L95-L96