Skip to content

Commit 02dee09

Browse files
committed
playwright backend
1 parent 645ee2d commit 02dee09

File tree

3 files changed

+103
-37
lines changed

3 files changed

+103
-37
lines changed

experiments/run_miniwob.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from agentlab.agents.generic_agent.generic_agent import GenericAgentArgs
99
from agentlab.agents.tapeagent.agent import TapeAgentArgs, load_config
1010
from agentlab.backends.browser.mcp_playwright import MCPPlaywright
11+
from agentlab.backends.browser.playwright import PlaywrightSyncBackend
1112
from agentlab.benchmarks.miniwob import MiniWobBenchmark
1213
from agentlab.experiments.study import make_study
1314
from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
@@ -20,13 +21,16 @@
2021
if __name__ == "__main__":
2122
config = load_config("miniwob")
2223

23-
# benchmark = DEFAULT_BENCHMARKS["miniwob"]()
24+
# benchmark = DEFAULT_BENCHMARKS["miniwob"](n_repeats=1)
25+
benchmark = MiniWobBenchmark(backend=PlaywrightSyncBackend())
26+
2427
agent_args = GenericAgentArgs(
2528
chat_model_args=CHAT_MODEL_ARGS_DICT["anthropic/claude-sonnet-4-20250514"],
2629
flags=FLAGS_GPT_4o,
2730
)
28-
29-
benchmark = MiniWobBenchmark(backend=MCPPlaywright())
31+
# agent_args.flags.obs.use_ax_tree = False
32+
# agent_args.flags.obs.use_html = True
33+
# agent_args.flags.obs.use_focused_element = False
3034
# agent_args =TapeAgentArgs(agent_name=config.name, config=config)
3135

3236

@@ -38,7 +42,7 @@
3842
logging_level_stdout=logging.INFO,
3943
)
4044
if os.environ.get("AGENTLAB_DEBUG"):
41-
study.exp_args_list = study.exp_args_list[1:2]
45+
study.exp_args_list = study.exp_args_list[23:24]
4246
study.run(n_jobs=1, n_relaunch=1, parallel_backend="sequential")
4347
else:
4448
study.run(n_jobs=config.n_jobs, n_relaunch=1, parallel_backend=config.parallel_backend)
Lines changed: 88 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,111 @@
1-
from playwright.sync_api import sync_playwright
1+
import logging
2+
from io import BytesIO
3+
from typing import Any, Callable
24

3-
from agentlab.backends.browser.base import BrowserBackend, ToolCallAction
5+
from PIL import Image
6+
from playwright.sync_api import Page, sync_playwright
7+
8+
from agentlab.backends.browser.base import BrowserBackend, ToolCallAction, ToolSpec
9+
10+
logger = logging.getLogger(__name__)
411

512

613
class PlaywrightSyncBackend(BrowserBackend):
7-
def __init__(self):
8-
self.actions = {
9-
"browser_press_key": lambda key: self.page.keyboard.press(key),
10-
"browser_type": lambda text: self.page.type(text),
11-
"browser_click": lambda selector: self.page.click(selector),
12-
"browser_drag": lambda from_selector, to_selector: self.drag_and_drop(
13-
from_selector, to_selector
14-
),
15-
"browser_hover": lambda selector: self.page.hover(selector),
16-
"browser_select_option": lambda selector: self.page.select_option(selector),
17-
"browser_mouse_click_xy": lambda x, y: self.page.mouse.click(x, y),
14+
_actions: dict[str, Callable]
15+
_browser: Any
16+
_page: Page
17+
18+
def model_post_init(self, __context: Any):
19+
self._actions = {
20+
"browser_press_key": self.browser_press_key,
21+
"browser_type": self.browser_type,
22+
"browser_click": self.browser_click,
23+
"browser_drag": self.browser_drag,
24+
"browser_hover": self.browser_hover,
25+
"browser_select_option": self.browser_select_option,
26+
"browser_mouse_click_xy": self.browser_mouse_click_xy,
1827
}
1928

20-
def drag_and_drop(self, from_selector: str, to_selector: str):
21-
from_elem = self.page.locator(from_selector)
29+
def browser_press_key(self, key: str):
30+
"""
31+
Press a key on the keyboard.
32+
"""
33+
self._page.keyboard.press(key)
34+
35+
def browser_type(self, text: str):
36+
"""
37+
Type text into the focused element.
38+
"""
39+
self._page.type(text)
40+
41+
def browser_click(self, selector: str):
42+
"""
43+
Click on a selector.
44+
"""
45+
self._page.click(selector)
46+
47+
def browser_drag(self, from_selector: str, to_selector: str):
48+
"""
49+
Drag and drop from one selector to another.
50+
"""
51+
from_elem = self._page.locator(from_selector)
2252
from_elem.hover(timeout=500)
23-
self.page.mouse.down()
53+
self._page.mouse.down()
2454

25-
to_elem = self.page.locator(to_selector)
55+
to_elem = self._page.locator(to_selector)
2656
to_elem.hover(timeout=500)
27-
self.page.mouse.up()
57+
self._page.mouse.up()
58+
59+
def browser_hover(self, selector: str):
60+
"""
61+
Hover over a given element.
62+
"""
63+
self._page.hover(selector)
64+
65+
def browser_select_option(self, selector: str):
66+
"""
67+
Select an option from a given element.
68+
"""
69+
self._page.select_option(selector)
70+
71+
def browser_mouse_click_xy(self, x: int, y: int):
72+
"""
73+
Click at a given x, y coordinate using the mouse.
74+
"""
75+
self._page.mouse.click(x, y)
2876

2977
def initialize(self):
30-
self.browser = sync_playwright().start().chromium.launch(headless=True)
31-
self.page = self.browser.new_page()
78+
self._browser = sync_playwright().start().chromium.launch(headless=True, chromium_sandbox=True)
79+
self._page = self._browser.new_page()
3280

3381
def run_js(self, js: str):
34-
return self.page.evaluate(js)
82+
js_result = self._page.evaluate(js)
83+
logger.info(f"JS result: {js_result}")
84+
return js_result
3585

3686
def goto(self, url: str):
37-
self.page.goto(url)
87+
self._page.goto(url)
3888

3989
def page_snapshot(self):
40-
return self.page.content()
90+
return self._page.content()
4191

4292
def page_screenshot(self):
43-
return self.page.screenshot()
93+
scr_bytes = self._page.screenshot()
94+
return Image.open(BytesIO(scr_bytes))
4495

4596
def step(self, action: ToolCallAction):
46-
fn = self.actions[action.function.name]
47-
return fn(**action.function.arguments)
48-
49-
def actions(self):
50-
return self.page.actions()
97+
fn = self._actions[action.function.name]
98+
action_result = fn(**action.function.arguments)
99+
snapshot = self.page_snapshot()
100+
screenshot = self.page_screenshot()
101+
return {
102+
"pruned_html": f"{action_result or ''}\n{snapshot}",
103+
"axtree_txt": snapshot,
104+
"screenshot": screenshot,
105+
}
106+
def actions(self) -> tuple[ToolSpec]:
107+
specs = [ToolSpec.from_function(fn) for fn in self._actions.values()]
108+
return tuple(specs)
51109

52110
def close(self):
53-
self.browser.close()
111+
self._browser.close()

src/agentlab/benchmarks/miniwob/task.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,14 @@ def get_task_validate_js(self) -> str:
120120
return [WOB_REWARD_GLOBAL, WOB_RAW_REWARD_GLOBAL, WOB_REWARD_REASON, WOB_DONE_GLOBAL, WOB_EPISODE_ID, WOB_TASK_READY];
121121
}"""
122122

123-
def parse_validation_result(self, validation_result: str) -> tuple[float, dict]:
124-
chunks = [c.strip() for c in validation_result.split(",")]
123+
def parse_validation_result(self, validation_result: str | list) -> tuple[float, dict]:
124+
if isinstance(validation_result, list):
125+
chunks = validation_result
126+
done = chunks[3]
127+
else:
128+
chunks = [c.strip() for c in validation_result.split(",")]
129+
done = chunks[3].strip().lower() == "true"
125130
raw_reward = float(chunks[1])
126-
done = chunks[3].strip().lower() == "true"
127131
reward = float(raw_reward > 0)
128132
return reward, {
129133
"raw_reward": raw_reward,

0 commit comments

Comments
 (0)