Skip to content

Commit 815893c

Browse files
committed
record task video, wait 60 sec after reset just as osworld own agent
1 parent 2b79b50 commit 815893c

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

src/agentlab/benchmarks/osworld.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import logging
44
import os
5+
import time
56
from copy import deepcopy
67
from dataclasses import dataclass
78
from io import BytesIO
@@ -340,7 +341,8 @@ def __init__(
340341
require_terminal: bool,
341342
os_type: str,
342343
enable_proxy: bool,
343-
max_steps: int = 50,
344+
max_steps: int,
345+
exp_dir: Path,
344346
):
345347
self.task = task
346348
self.env_info = {
@@ -372,10 +374,15 @@ def __init__(
372374
)
373375
self._step_count = 0
374376
self.max_steps = max_steps
377+
self.exp_dir = exp_dir
375378

376379
def reset(self, seed: int | None = None) -> tuple[dict[str, Any], dict[str, Any]]:
377-
raw_obs = self.env.reset(task_config=self.task, seed=seed)
378-
obs = self.env_to_agentlab_observation(raw_obs)
380+
self.env.reset(task_config=self.task, seed=seed)
381+
logging.info(f"Start solving task: {self.task['instruction']}")
382+
time.sleep(60) # Wait for the environment to be ready, as in https://github.com/xlang-ai/OSWorld/blob/main/lib_run_single.py#L15
383+
raw_obs = self.env._get_obs() # Get the initial observation
384+
self.env.controller.start_recording()
385+
obs = self.to_agentlab_observation(raw_obs)
379386
self._step_count = 0
380387
return obs, self.env_info
381388

@@ -385,7 +392,7 @@ def step(self, action: str):
385392
env_action = self.agentlab_to_env_action(action)
386393
logger.info(f"AgentLab Action returned: {action}, converted to: {env_action}")
387394
raw_obs, reward, done, info = self.env.step(env_action)
388-
logger.info(f"Task {self.task['id']} Step {self._step_count + 1}/{self.max_steps} done")
395+
logger.info(f"STEP {self.task['id']} {self._step_count + 1}/{self.max_steps}")
389396
self._step_count += 1
390397
truncated = info.get("fail", False) or self._step_count >= self.max_steps
391398
if done or truncated:
@@ -398,7 +405,7 @@ def step(self, action: str):
398405
logger.info(f"Evaluated reward: {reward}")
399406
except Exception as e:
400407
logger.error(f"Failed to evaluate {self.task} task: {e}")
401-
obs = self.env_to_agentlab_observation(raw_obs)
408+
obs = self.to_agentlab_observation(raw_obs)
402409
return obs, reward, done, truncated, info
403410

404411
def agentlab_to_env_action(self, action: str) -> Any:
@@ -410,7 +417,7 @@ def agentlab_to_env_action(self, action: str) -> Any:
410417
"PyAutoGUI action space is not supported yet. Please use 'computer_13' action space."
411418
)
412419

413-
def env_to_agentlab_observation(self, obs: dict[str, Any]) -> dict[str, Any]:
420+
def to_agentlab_observation(self, obs: dict[str, Any]) -> dict[str, Any]:
414421
"""Convert OSWorld observation to AgentLab format."""
415422
converted_obs = {}
416423

@@ -467,7 +474,7 @@ def convert_agentlab_action_to_computer_13(self, action: str) -> dict[str, Any]
467474
... snapshot_name="init_state", action_space="computer_13",
468475
... cache_dir="cache", screen_size=(1920, 1080), headless=True,
469476
... require_a11y_tree=True, require_terminal=False, os_type="Ubuntu",
470-
... enable_proxy=False, max_steps=50)
477+
... enable_proxy=False, max_steps=50, exp_dir=Path("."))
471478
>>> env.convert_agentlab_action_to_computer_13("move_to(x=100, y=200)")
472479
{'action_type': 'MOVE_TO', 'parameters': {'x': 100, 'y': 200}}
473480
>>> env.convert_agentlab_action_to_computer_13("wait()")
@@ -513,6 +520,9 @@ def parse_agentlab_action_str_to_func_args(action: str):
513520
return None, None, None
514521

515522
def close(self):
523+
video_name = str(self.exp_dir / "recording.mp4")
524+
self.env.controller.end_recording(video_name)
525+
logger.info(f"Recorded video saved to {video_name}")
516526
return self.env.close()
517527

518528

@@ -614,6 +624,7 @@ def make_env(
614624
os_type=self.os_type,
615625
enable_proxy=self.enable_proxy,
616626
max_steps=self.max_steps,
627+
exp_dir=exp_dir,
617628
)
618629
return gym
619630

0 commit comments

Comments
 (0)