Skip to content

Commit bb38053

Browse files
Remove 'action_set' from index_black_list in load_result_df and make OSWorldActionSet a dataclass for proper repr.
1 parent cf4b277 commit bb38053

File tree

2 files changed

+15
-14
lines changed

2 files changed

+15
-14
lines changed

src/agentlab/analyze/inspect_results.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def load_result_df(
109109
set_index=True,
110110
result_df=None,
111111
index_white_list=("agent.*",),
112-
index_black_list=("*model_url*", "*extra*", "*._*", "*action_set"),
112+
index_black_list=("*model_url*", "*extra*", "*._*"),
113113
remove_args_suffix=True,
114114
):
115115
"""Load the result dataframe.

src/agentlab/benchmarks/osworld.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -547,14 +547,15 @@ def close(self):
547547
return self.env.close()
548548

549549

550-
class OSWorldActionSet(AbstractActionSet):
550+
@dataclass
551+
class OSWorldActionSet(AbstractActionSet, DataClassJsonMixin):
551552
# TODO: Define and use agentlab AbstractActionSet
552553
# AbstractActionSet should define some standard format to represent actions.(list of dict with keys that are MCP compatible)
553554
# Should we have 'abstract function' here for action conversion for backend LLM with fixed action set like UI-Tars or Semi-fixed action set LLMs like OpenAI CUA?
554555
# TODO: We need to support both 'action space as tools' and 'action space as prompt' for agentlab agents
555556
# and have conversion functions to convert them to format acceptable by environment.
556-
def __init__(self, action_space: Literal["computer_13", "pyautogui"]):
557-
self.action_space = action_space
557+
action_space: Literal["computer_13", "pyautogui"] = "computer_13"
558+
multiaction: bool = False
558559

559560
def describe(self, with_long_description: bool = True, with_examples: bool = True) -> str:
560561
"""Describe the OSWorld action set for desktop interactions."""
@@ -598,22 +599,22 @@ def format_response_api_tools_to_anthropic(tools: list[dict]) -> list[dict]:
598599
return formatted_tools
599600

600601

601-
@dataclass
602-
class OSWorldActionSetArgs(DataClassJsonMixin):
603-
action_space: Literal["computer_13", "pyautogui"] = "computer_13"
602+
# @dataclass
603+
# class OSWorldActionSetArgs(DataClassJsonMixin):
604+
# action_space: Literal["computer_13", "pyautogui"] = "computer_13"
604605

605-
def make_action_set(self):
606-
logger.info(f"Creating OSWorld Action Set with action space: {self.action_space}")
607-
return OSWorldActionSet(action_space=self.action_space)
606+
# def make_action_set(self):
607+
# logger.info(f"Creating OSWorld Action Set with action space: {self.action_space}")
608+
# return OSWorldActionSet(action_space=self.action_space)
608609

609610

610611
@dataclass
611612
class OsworldEnvArgs(AbstractEnvArgs):
612613
task: dict[str, Any]
613614
task_seed: int = 0
614615
task_name: str | None = None
615-
path_to_vm: str | None = None # path to .vmx file
616-
provider_name: str = "docker"
616+
path_to_vm: str | None = "OSWorld/vmware_vm_data/Ubuntu0/Ubuntu0.vmx" # path to .vmx file
617+
provider_name: str = "vmware" # path to .vmx file
617618
region: str = "us-east-1" # AWS specific, does not apply to all providers
618619
snapshot_name: str = "init_state" # snapshot name to revert to
619620
action_space: Literal["computer_13", "pyautogui"] = "computer_13"
@@ -653,7 +654,7 @@ def make_env(
653654
class OsworldBenchmark(AbstractBenchmark):
654655
name: str = "osworld"
655656
is_multi_tab: bool = False
656-
high_level_action_set_args: OSWorldActionSetArgs = None # type: ignore
657+
high_level_action_set_args: OSWorldActionSet = None # type: ignore
657658
test_set_path: str = "OSWorld/evaluation_examples"
658659
test_set_name: str = "test_all.json"
659660
domain: str = "all"
@@ -664,7 +665,7 @@ def model_post_init(self, __context: Any) -> None:
664665
self.env_args_list = []
665666
if not self.env_args:
666667
self.env_args = OsworldEnvArgs(task={})
667-
self.high_level_action_set_args = OSWorldActionSetArgs(
668+
self.high_level_action_set_args = OSWorldActionSet(
668669
action_space=self.env_args.action_space
669670
)
670671
with open(os.path.join(self.test_set_path, self.test_set_name)) as f:

0 commit comments

Comments
 (0)