Skip to content

Commit 805c717

Browse files
committed
fixes
1 parent cf68ef6 commit 805c717

File tree

4 files changed

+23
-13
lines changed

4 files changed

+23
-13
lines changed

src/agentlab/agents/react_toolcall_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def get_action(self, obs: dict) -> tuple[ToolCall, dict]:
145145
messages = self.history + [{"role": "user", "content": self.config.guidance}]
146146

147147
try:
148-
logger.info(colored(f"Prompt:\n{pprint.pformat(messages, width=120)}", "blue"))
148+
logger.info(colored(f"Prompt:\n{pprint.pformat([str(m)[:500] for m in messages], width=120)}", "blue"))
149149
response = self.llm(tools=self.tools, messages=messages)
150150
message = response.choices[0].message # type: ignore
151151
except Exception as e:

src/agentlab/backends/browser/playwright.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def initialize(self):
4343
_pw = sync_playwright().start()
4444
if _browser is None:
4545
_browser = _pw.chromium.launch(headless=True, chromium_sandbox=True)
46+
4647
self._page = _browser.new_page()
4748

4849
@property
@@ -93,8 +94,17 @@ def evaluate_js(self, js: str):
9394
return js_result
9495

9596
def goto(self, url: str):
97+
"""Navigate to a specified URL."""
9698
self._page.goto(url)
9799

100+
def browser_back(self):
101+
"""Navigate back in browser history."""
102+
self._page.go_back()
103+
104+
def browser_forward(self):
105+
"""Navigate forward in browser history."""
106+
self._page.go_forward()
107+
98108
def page_html(self) -> str:
99109
return self._page.content()
100110

@@ -157,7 +167,7 @@ async def initialize(self):
157167
if _apw is None:
158168
_apw = await async_playwright().start()
159169
if _abrowser is None:
160-
_abrowser = await _apw.chromium.launch(headless=True, chromium_sandbox=True)
170+
_abrowser = await _apw.chromium.launch(headless=False, chromium_sandbox=True)
161171
self._page = await _abrowser.new_page()
162172

163173
async def browser_press_key(self, key: str):

src/agentlab/benchmarks/workarena/benchmark.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from browsergym.workarena import get_all_tasks_agents
55
from browsergym.workarena.instance import SNowInstance
66
from pydantic import ConfigDict
7-
from ray.cloudpickle import instance
87

98
from agentlab.actions import ToolsActionSet
109
from agentlab.backends.browser.base import BrowserBackend
@@ -22,6 +21,7 @@ class WorkArenaBenchmark(AbstractBenchmark):
2221
backend_cls: type[BrowserBackend]
2322
name: str = "workarena"
2423
level: str = "l1"
24+
n_seeds: int = 1
2525
env_args_list: list[BrowserEnvArgs] = None # type: ignore
2626
dataset: list[WorkarenaTask] = None # type: ignore
2727
is_multi_tab: bool = False
@@ -33,16 +33,15 @@ def model_post_init(self, __context: Any) -> None:
3333
self._snow_instance = SNowInstance()
3434
self.env_args_list = []
3535
if self.dataset is None:
36-
task_seed_tuples = get_all_tasks_agents(filter=self.level)
37-
self.dataset = self.load_tasks(task_seed_tuples, self.level)
36+
self.dataset = self.load_tasks(self.level)
3837
for task in self.dataset:
3938
env_args = BrowserEnvArgs(task=task, backend_cls=self.backend_cls)
4039
self.env_args_list.append(env_args)
4140
logger.info(f"Loaded {len(self.env_args_list)} workarena tasks")
4241

43-
def load_tasks(self, task_seed_tuples: list[tuple[type, int]], level: str) -> list[WorkarenaTask]:
42+
def load_tasks(self, level: str) -> list[WorkarenaTask]:
43+
task_seed_tuples = get_all_tasks_agents(filter=self.level, n_seed_l1=self.n_seeds)
4444
tasks = []
45-
4645
for task_cls, seed in task_seed_tuples:
4746
task = WorkarenaTask(
4847
url="",
@@ -53,4 +52,5 @@ def load_tasks(self, task_seed_tuples: list[tuple[type, int]], level: str) -> li
5352
seed=seed,
5453
)
5554
tasks.append(task)
55+
logger.info(f"Loaded {len(tasks)} tasks for level {level}")
5656
return tasks

src/agentlab/benchmarks/workarena/task.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,23 @@ class WorkarenaTask(AbstractWebTask):
2424
actions_whitelist: ClassVar[list[str]] = [
2525
"browser_press_key",
2626
"browser_type",
27-
"browser_click",
28-
"browser_drag",
29-
"browser_hover",
3027
"browser_select_option",
3128
"browser_mouse_click_xy",
3229
"browser_wait",
30+
"browser_back",
31+
"browser_forward",
3332
]
3433

3534
def setup(self, backend: BrowserBackend) -> tuple[str, dict]:
3635
if not backend.has_pw_page:
3736
raise ValueError("Workarena task requires a backend with playwright page access.")
3837
self._backend = backend
39-
self._task_obj = self.task_cls(instance=self.instance, seed=self.seed) # type: ignore
38+
self._task_obj = self.task_cls(instance=self.instance, seed=self.seed) # type: ignore
4039
self.url = self._task_obj.start_url
4140
goal, info = self._task_obj.setup(backend.page)
41+
backend.goto(self.url)
4242
logger.info(f"Current backend page URL: {backend.page.url}")
43-
# backend.goto(self.url)
43+
4444
return goal, info
4545

4646
def teardown(self) -> None:
@@ -54,4 +54,4 @@ def validate(self) -> tuple[float, dict]:
5454
def obs_postprocess(self, obs: dict) -> dict:
5555
html = obs.pop("html", "")
5656
obs["pruned_html"] = prune_html(html)
57-
return obs
57+
return obs

0 commit comments

Comments
 (0)