Merge pull request #263 from ServiceNow/osworld

amanjaiswal73892 · web-flow · commit 41c4ead4d727 · 2025-07-15T17:03:22.000-04:00
Update relaunch flag to False and adjust model_args for OSWORLD_OAI
diff --git a/experiments/run_osworld.py b/experiments/run_osworld.py
@@ -28,7 +28,7 @@ def get_task_ids() -> set[str]:
 def main():
     n_jobs = 4
     use_vmware = True
-    relaunch = True
+    relaunch = False
     agent_args = [
         OSWORLD_CLAUDE,
         #    OSWORLD_OAI # performs poorly.
diff --git a/src/agentlab/agents/tool_use_agent/tool_use_agent.py b/src/agentlab/agents/tool_use_agent/tool_use_agent.py
@@ -7,21 +7,18 @@
 from typing import Any
 
 import bgym
-import numpy as np
 import pandas as pd
+from bgym import Benchmark as BgymBenchmark
 from browsergym.core.observation import extract_screenshot
 from browsergym.utils.obs import (
     flatten_axtree_to_str,
     flatten_dom_to_str,
     overlay_som,
     prune_html,
 )
-from PIL import Image
 
-from agentlab.agents import agent_utils
-from agentlab.benchmarks.abstract_env import AbstractBenchmark as AgentLabBenchmark
-from bgym import Benchmark as BgymBenchmark
 from agentlab.agents.agent_args import AgentArgs
+from agentlab.benchmarks.abstract_env import AbstractBenchmark as AgentLabBenchmark
 from agentlab.benchmarks.osworld import OSWorldActionSet
 from agentlab.llm.base_api import BaseModelArgs
 from agentlab.llm.llm_utils import image_to_png_base64_url
@@ -629,7 +626,7 @@ def get_action(self, obs: Any) -> float:
 )
 
 OSWORLD_OAI = ToolUseAgentArgs(
-    model_args=OPENAI_MODEL_CONFIG,
+    model_args=GPT_4_1_MINI,
     config=PromptConfig(
         tag_screenshot=True,
         goal=Goal(goal_as_system_msg=True),