22import logging
33import os
44
5- from tapeagents import agent
6-
7- from agentlab .agents .tool_use_agent .tool_use_agent import OSWORLD_CLAUDE , OSWORLD_OAI
5+ from agentlab .agents .tool_use_agent .tool_use_agent import OSWORLD_CLAUDE
86from agentlab .benchmarks .osworld import OsworldBenchmark
9- from agentlab .experiments .study import make_study , Study
7+ from agentlab .experiments .study import Study , make_study
108
119fmt = "%(asctime)s - %(levelname)s - %(name)s:%(lineno)d - %(funcName)s() - %(message)s"
1210logging .basicConfig (level = logging .INFO , force = True , format = fmt , handlers = [logging .StreamHandler ()])
@@ -20,6 +18,7 @@ def get_most_recent_incomplete_study() -> Study:
2018 study .find_incomplete (include_errors = True )
2119 return study
2220
21+
2322def get_task_ids () -> set [str ]:
2423 with open ("experiments/osworld_debug_task_ids.json" , "r" ) as f :
2524 task_ids = json .load (f )
@@ -32,13 +31,15 @@ def main():
3231 relaunch = True
3332 agent_args = [
3433 OSWORLD_CLAUDE ,
35- # OSWORLD_OAI # performs poorly.
36- ] # type: ignore
34+ # OSWORLD_OAI # performs poorly.
35+ ] # type: ignore
3736 parallel_backend = "ray"
3837 os .environ ["AGENTLAB_DEBUG" ] = os .environ .get ("AGENTLAB_DEBUG" , "1" )
3938
4039 study = make_study (
41- benchmark = OsworldBenchmark (test_set_name = "test_small.json" ), # or test_all.json (Exper) # type: ignore
40+ benchmark = OsworldBenchmark (
41+ test_set_name = "test_small.json"
42+ ), # or test_all.json (Exper) # type: ignore
4243 agent_args = agent_args , # type: ignore
4344 comment = "osworld debug 2" ,
4445 logging_level = logging .INFO ,
0 commit comments