@@ -385,13 +385,19 @@ def step(self, action: str):
385385 env_action = self .agentlab_to_env_action (action )
386386 logger .info (f"AgentLab Action returned: { action } , converted to: { env_action } " )
387387 raw_obs , reward , done , info = self .env .step (env_action )
388+ logger .info (f"Task { self .task ['id' ]} Step { self ._step_count + 1 } /{ self .max_steps } done" )
388389 self ._step_count += 1
389- truncated = info .get (' fail' , False ) or self ._step_count >= self .max_steps
390+ truncated = info .get (" fail" , False ) or self ._step_count >= self .max_steps
390391 if done or truncated :
392+ if done :
393+ logger .info (f"Task { self .task ['id' ]} completed successfully." )
394+ else :
395+ logger .warning (f"Task { self .task ['id' ]} truncated after { self ._step_count } steps." )
391396 try :
392397 reward = self .env .evaluate ()
398+ logger .info (f"Evaluated reward: { reward } " )
393399 except Exception as e :
394- logger .warning (f"Failed to evaluate { self .task } task: { e } " )
400+ logger .error (f"Failed to evaluate { self .task } task: { e } " )
395401 obs = self .env_to_agentlab_observation (raw_obs )
396402 return obs , reward , done , truncated , info
397403
@@ -433,7 +439,8 @@ def _add_screenshot(self, converted_obs: dict[str, Any], obs: dict[str, Any]) ->
433439 def _add_som_screenshot (self , converted_obs : dict [str , Any ], obs : dict [str , Any ]) -> None :
434440 """Convert SOM screenshot to numpy array format expected by AgentLab"""
435441 masks , drew_nodes , tagged_screenshot , linearized_accessibility_tree = tag_screenshot (
436- obs ["screenshot" ], obs ["accessibility_tree" ], platform = "ubuntu" )
442+ obs ["screenshot" ], obs ["accessibility_tree" ], platform = "ubuntu"
443+ )
437444 converted_obs ["som_screenshot" ] = self .convert_screenshot_to_numpy (tagged_screenshot )
438445
439446 def _add_browser_context (self , converted_obs : dict [str , Any ]):
@@ -454,10 +461,10 @@ def _add_task_context(self, converted_obs: dict[str, Any], obs: dict[str, Any]):
454461
455462 def convert_agentlab_action_to_computer_13 (self , action : str ) -> dict [str , Any ] | str :
456463 """Convert action string to dictionary format.
457-
464+
458465 Examples:
459- >>> env = OsworldGym(task={}, provider_name="vmware", region=None, path_to_vm=None,
460- ... snapshot_name="init_state", action_space="computer_13",
466+ >>> env = OsworldGym(task={}, provider_name="vmware", region=None, path_to_vm=None,
467+ ... snapshot_name="init_state", action_space="computer_13",
461468 ... cache_dir="cache", screen_size=(1920, 1080), headless=True,
462469 ... require_a11y_tree=True, require_terminal=False, os_type="Ubuntu",
463470 ... enable_proxy=False, max_steps=50)
@@ -467,23 +474,25 @@ def convert_agentlab_action_to_computer_13(self, action: str) -> dict[str, Any]
467474 'WAIT'
468475 """
469476
470- action_type , action_args , action_kwargs = self .parse_agentlab_action_str_to_func_args (action )
477+ action_type , action_args , action_kwargs = self .parse_agentlab_action_str_to_func_args (
478+ action
479+ )
471480
472- if action_type in ["wait" ,"done" , "fail" ]:
473- return str (action_type ).upper ()
481+ if action_type in ["wait" , "done" , "fail" ]:
482+ return str (action_type ).upper ()
474483 if action_args :
475484 logger .warning (
476485 f"""Action '{ action_type } ' has unexpected positional arguments: { action_args } .
477486 OSWorld Computer 13 actions are processed as dictionaries."""
478487 )
479488 action_kwargs = action_kwargs if action_kwargs is not None else {}
480489
481- return { "action_type" : str (action_type ).upper (), "parameters" : action_kwargs }
490+ return {"action_type" : str (action_type ).upper (), "parameters" : action_kwargs }
482491
483492 @staticmethod
484493 def parse_agentlab_action_str_to_func_args (action : str ):
485494 """Parse the agentlab action string to extract function name, args, and kwargs.
486-
495+
487496 Examples:
488497 >>> parse_agentlab_action_str_to_func_args("move_to(x=100, y=200)")
489498 ('move_to', [], {'x': 100, 'y': 200})
@@ -515,9 +524,11 @@ class OSWorldActionSet(AbstractActionSet):
515524 # and have conversion functions to convert them to format acceptable by environment.
516525 def __init__ (self , action_space : Literal ["computer_13" , "pyautogui" ]):
517526 self .action_space = action_space
527+
518528 def describe (self , with_long_description : bool = True , with_examples : bool = True ) -> str :
519529 """Describe the OSWorld action set for desktop interactions."""
520530 pass
531+
521532 def example_action (self , abstract : bool ) -> str :
522533 """Provide example actions for the action set."""
523534 pass
@@ -582,7 +593,7 @@ class OsworldEnvArgs(AbstractEnvArgs):
582593 require_terminal : bool = False
583594 os_type : str = "Ubuntu"
584595 enable_proxy : bool = False
585- max_steps : int = 100
596+ max_steps : int = 50
586597
587598 def make_env (
588599 self , exp_dir : Path , action_mapping = None , use_raw_page_output : bool = False
0 commit comments