11import argparse
22import logging
33import os
4- import sys
54
65from bgym import DEFAULT_BENCHMARKS
76from dotenv import load_dotenv
87
98from agentlab .agents .generic_agent .agent_configs import GPT5_MINI_FLAGS
109from agentlab .agents .generic_agent .generic_agent import GenericAgentArgs
1110from agentlab .agents .react_toolcall_agent import AgentConfig , LLMArgs , ReactToolCallAgentArgs
12- from agentlab .agents .tapeagent .agent import TapeAgentArgs , load_config
1311from agentlab .backends .browser .mcp_playwright import MCPPlaywright
1412from agentlab .backends .browser .playwright import SyncPlaywright
1513from agentlab .benchmarks .miniwob import MiniWobBenchmark
1614from agentlab .experiments .study import make_study
17- from agentlab .llm .chat_api import BaseModelArgs
1815from agentlab .llm .llm_configs import CHAT_MODEL_ARGS_DICT
1916
2017fmt = "%(asctime)s - %(levelname)s - %(name)s:%(lineno)d - %(funcName)s() - %(message)s"
2320load_dotenv ()
2421
2522
26-
2723def parse_args ():
2824 parser = argparse .ArgumentParser (description = "Run MiniWob benchmark experiments" )
2925 parser .add_argument (
@@ -49,7 +45,6 @@ def parse_args():
4945
5046if __name__ == "__main__" :
5147 args = parse_args ()
52- config = load_config (args .config )
5348
5449 if args .backend == "bgym" :
5550 benchmark = DEFAULT_BENCHMARKS ["miniwob" ](n_repeats = 1 )
@@ -65,23 +60,22 @@ def parse_args():
6560 chat_model_args = CHAT_MODEL_ARGS_DICT ["azure/gpt-5-mini-2025-08-07" ],
6661 flags = GPT5_MINI_FLAGS ,
6762 )
68- elif args . agent == " react" :
63+ else : # react
6964 agent_args = ReactToolCallAgentArgs (
70- llm_args = LLMArgs (model_name = "azure/gpt-5-mini" , temperature = 1.0 , max_total_tokens = 128000 ),
65+ llm_args = LLMArgs (
66+ model_name = "azure/gpt-5-mini" , temperature = 1.0 , max_total_tokens = 128000
67+ ),
7168 config = AgentConfig (),
7269 )
73- else :
74- agent_args = TapeAgentArgs (agent_name = config .name , config = config )
7570
7671 study = make_study (
7772 benchmark = benchmark ,
7873 agent_args = agent_args ,
79- comment = config .comment ,
8074 logging_level = logging .INFO ,
8175 logging_level_stdout = logging .INFO ,
8276 )
8377 if os .environ .get ("AGENTLAB_DEBUG" ):
8478 study .exp_args_list = study .exp_args_list [23 :27 ]
8579 study .run (n_jobs = 1 , n_relaunch = 1 , parallel_backend = "sequential" )
8680 else :
87- study .run (n_jobs = config . n_jobs , n_relaunch = 1 , parallel_backend = config . parallel_backend )
81+ study .run (n_jobs = 8 , n_relaunch = 1 , parallel_backend = "ray" )
0 commit comments