Skip to content

Commit dfbc005

Browse files
committed
improve entrypoint
1 parent 20502a8 commit dfbc005

File tree

1 file changed

+44
-13
lines changed

1 file changed

+44
-13
lines changed

experiments/run_miniwob.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import argparse
12
import logging
23
import os
4+
import sys
35

46
from bgym import DEFAULT_BENCHMARKS
57
from dotenv import load_dotenv
@@ -18,22 +20,51 @@
1820
logger = logging.getLogger(__name__)
1921
load_dotenv()
2022

21-
if __name__ == "__main__":
22-
config = load_config("miniwob")
2323

24-
# benchmark = DEFAULT_BENCHMARKS["miniwob"](n_repeats=1)
25-
# benchmark = MiniWobBenchmark(backend=MCPPlaywright())
26-
benchmark = MiniWobBenchmark(backend=AsyncPlaywright())
2724

28-
# agent_args = GenericAgentArgs(
29-
# chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-mini-2025-08-07"],
30-
# flags=GPT5_MINI_FLAGS,
31-
# )
32-
# agent_args.flags.obs.use_ax_tree = False
33-
# agent_args.flags.obs.use_html = True
34-
# agent_args.flags.obs.use_focused_element = False
35-
agent_args = TapeAgentArgs(agent_name=config.name, config=config)
25+
def parse_args():
26+
parser = argparse.ArgumentParser(description="Run MiniWob benchmark experiments")
27+
parser.add_argument(
28+
"--backend",
29+
choices=["playwright", "mcp", "bgym"],
30+
default="playwright",
31+
help="Browser backend to use (default: playwright)",
32+
)
33+
parser.add_argument(
34+
"--agent",
35+
choices=["tape", "generic"],
36+
default="tape",
37+
help="Agent type to use (default: tape)",
38+
)
39+
parser.add_argument(
40+
"--config",
41+
type=str,
42+
default="miniwob",
43+
help="Hydra config name to load (default: miniwob)",
44+
)
45+
return parser.parse_args()
46+
47+
48+
if __name__ == "__main__":
49+
args = parse_args()
50+
config = load_config(args.config)
3651

52+
if args.backend == "bgym":
53+
benchmark = DEFAULT_BENCHMARKS["miniwob"](n_repeats=1)
54+
elif args.backend == "playwright":
55+
benchmark = MiniWobBenchmark(backend_cls=AsyncPlaywright)
56+
elif args.backend == "mcp":
57+
benchmark = MiniWobBenchmark(backend_cls=MCPPlaywright)
58+
else:
59+
raise ValueError(f"Unknown backend: {args.backend}")
60+
61+
if args.agent == "generic":
62+
agent_args = GenericAgentArgs(
63+
chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-mini-2025-08-07"],
64+
flags=GPT5_MINI_FLAGS,
65+
)
66+
else:
67+
agent_args = TapeAgentArgs(agent_name=config.name, config=config)
3768

3869
study = make_study(
3970
benchmark=benchmark,

0 commit comments

Comments
 (0)