-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlocal_test.py
More file actions
87 lines (60 loc) · 2.42 KB
/
local_test.py
File metadata and controls
87 lines (60 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""Local test script for the blank environment.
Run the backend first: uvicorn backend.app:app --port 8005
Then run this script: python local_test.py
"""
import asyncio
import hud
from hud.agents import OpenAIChatAgent
from openai import AsyncOpenAI
from env import env
# Use HUD inference gateway - see all models at https://hud.ai/models
client = AsyncOpenAI(base_url="https://inference.hud.ai")
async def test_tools_standalone():
"""Test environment tools directly."""
print("=== Test 1: Standalone Tools ===")
async with env:
print(f"Tools: {[t.name for t in env.as_tools()]}")
for _ in range(3):
print(await env.call_tool("act"))
async def test_scenario_manual():
"""Test scenario with manual OpenAI calls."""
print("\n=== Test 2: Scenario (Manual Agent Loop) ===")
task = env("count-to", target=3)
async with hud.eval(task) as ctx:
messages = [{"role": "user", "content": ctx.prompt}]
while True:
response = await client.chat.completions.create(
model="gpt-4o", # https://hud.ai/models
messages=messages,
tools=ctx.as_openai_chat_tools(),
)
msg = response.choices[0].message
if not msg.tool_calls:
break
messages.append(msg)
for tc in msg.tool_calls:
result = await ctx.call_tool(tc)
messages.append(result)
async def test_scenario_agent():
"""Test scenario with OpenAIChatAgent."""
print("\n=== Test 3: Scenario (Agent) ===")
task = env("count-to", target=5)
async with hud.eval(task) as ctx:
agent = OpenAIChatAgent.create(model="gpt-4o") # https://hud.ai/models
await agent.run(ctx)
async def test_distribution():
"""Test multiple tasks with variants and groups for A/B testing."""
print("\n=== Test 4: Distribution (Variants + Groups) ===")
tasks = [env("count-to", target=3), env("count-to", target=5)]
variants = {"model": ["gpt-4o-mini", "gpt-4o"]}
group = 2
async with hud.eval(tasks, variants=variants, group=group) as ctx:
agent = OpenAIChatAgent.create(model=ctx.variants["model"])
await agent.run(ctx, max_steps=10)
async def main():
await test_tools_standalone()
await test_scenario_manual()
await test_scenario_agent()
await test_distribution()
if __name__ == "__main__":
asyncio.run(main())