Skip to content

Commit ecf59d5

Browse files
committed
async playwright backend
1 parent dba5978 commit ecf59d5

File tree

4 files changed

+70
-48
lines changed

4 files changed

+70
-48
lines changed

src/agentlab/backends/browser/base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,15 @@ def run_js(self, js: str):
8080
def goto(self, url: str) -> str:
8181
raise NotImplementedError
8282

83-
def page_snapshot(self) -> str:
83+
def page_html(self) -> str:
8484
raise NotImplementedError
8585

8686
def page_screenshot(self) -> Image:
8787
raise NotImplementedError
8888

89+
def page_axtree(self) -> str:
90+
raise NotImplementedError
91+
8992
def step(self, action: ToolCallAction) -> str:
9093
raise NotImplementedError
9194

src/agentlab/backends/browser/env.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,13 @@ def reset(self, seed: int):
3131
if setup_js:
3232
self.goal = self.backend.run_js(setup_js)
3333
logger.info(f"Task goal: {self.goal}")
34-
page_content = self.backend.page_snapshot()
34+
html = self.backend.page_html()
3535
screenshot = self.backend.page_screenshot()
36+
axtree = self.backend.page_axtree()
3637
obs = {
3738
"goal_object": [{"type": "text", "text": self.goal}],
38-
"pruned_html": "",
39-
"axtree_txt": page_content,
39+
"pruned_html": html,
40+
"axtree_txt": axtree,
4041
"screenshot": screenshot,
4142
"last_action_error": "",
4243
"focused_element_bid": "none",
@@ -89,12 +90,13 @@ def step(self, action: ToolCallAction | str) -> tuple[dict, float, bool, bool, d
8990

9091
def _step(self, action: ToolCallAction) -> dict:
9192
obs_dict = self.backend.step(action)
92-
return {
93-
"goal_object": [{"type": "text", "text": self.goal}],
94-
**obs_dict,
95-
"last_action_error": "",
96-
"focused_element_bid": "none",
97-
}
93+
if "goal_object" not in obs_dict:
94+
obs_dict["goal_object"] = [{"type": "text", "text": self.goal}]
95+
if "last_action_error" not in obs_dict:
96+
obs_dict["last_action_error"] = ""
97+
if "focused_element_bid" not in obs_dict:
98+
obs_dict["focused_element_bid"] = "none"
99+
return obs_dict
98100

99101
def validate_task(self, action: ToolCallAction, observation: dict) -> tuple[float, dict]:
100102
validate_js = self.task.get_step_validate_js()

src/agentlab/backends/browser/mcp_playwright.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,20 @@ def step(self, action: ToolCallAction) -> dict:
3232
tool_result = "\n".join(
3333
[c.text for c in contents if c.type == "text" and "# Ran Playwright code" not in c.text]
3434
)
35-
snapshot = self.page_snapshot()
35+
html = self.page_html()
3636
screenshot = self.page_screenshot()
37+
axtree = self.page_axtree()
3738
return {
3839
"tool_result": tool_result,
39-
"pruned_html": "",
40-
"axtree_txt": snapshot,
40+
"pruned_html": html,
41+
"axtree_txt": axtree,
4142
"screenshot": screenshot,
4243
}
4344

44-
def page_snapshot(self) -> str:
45+
def page_html(self) -> str:
46+
return ""
47+
48+
def page_axtree(self) -> str:
4549
contents = self.call_tool("browser_snapshot", {})
4650
return "\n".join([c.text for c in contents if c.type == "text"])
4751

Lines changed: 47 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
1+
import asyncio
12
import logging
23
from io import BytesIO
34
from typing import Any, Callable
45

56
from PIL import Image
6-
from playwright.sync_api import Page, sync_playwright
7+
from playwright.async_api import Browser, Page, async_playwright
78

89
from agentlab.backends.browser.base import BrowserBackend, ToolCallAction, ToolSpec
910

1011
logger = logging.getLogger(__name__)
1112

1213

13-
class PlaywrightSyncBackend(BrowserBackend):
14+
class AsyncPlaywright(BrowserBackend):
1415
_actions: dict[str, Callable]
15-
_browser: Any
16+
_loop: asyncio.AbstractEventLoop
17+
_browser: Browser
1618
_page: Page
1719

1820
def model_post_init(self, __context: Any):
@@ -26,86 +28,97 @@ def model_post_init(self, __context: Any):
2628
"browser_mouse_click_xy": self.browser_mouse_click_xy,
2729
}
2830

29-
def browser_press_key(self, key: str):
31+
def initialize(self):
32+
self._loop = asyncio.get_event_loop()
33+
self._loop.run_until_complete(self.ainitialize())
34+
35+
async def ainitialize(self):
36+
pw = await async_playwright().start()
37+
self._browser = await pw.chromium.launch(headless=True, chromium_sandbox=True)
38+
self._page = await self._browser.new_page()
39+
40+
async def browser_press_key(self, key: str):
3041
"""
3142
Press a key on the keyboard.
3243
"""
33-
self._page.keyboard.press(key)
44+
await self._page.keyboard.press(key)
3445

35-
def browser_type(self, text: str):
46+
async def browser_type(self, text: str):
3647
"""
3748
Type text into the focused element.
3849
"""
39-
self._page.type(text)
50+
await self._page.type(text)
4051

41-
def browser_click(self, selector: str):
52+
async def browser_click(self, selector: str):
4253
"""
4354
Click on a selector.
4455
"""
45-
self._page.click(selector)
56+
await self._page.click(selector)
4657

47-
def browser_drag(self, from_selector: str, to_selector: str):
58+
async def browser_drag(self, from_selector: str, to_selector: str):
4859
"""
4960
Drag and drop from one selector to another.
5061
"""
5162
from_elem = self._page.locator(from_selector)
52-
from_elem.hover(timeout=500)
53-
self._page.mouse.down()
63+
await from_elem.hover(timeout=500)
64+
await self._page.mouse.down()
5465

5566
to_elem = self._page.locator(to_selector)
56-
to_elem.hover(timeout=500)
57-
self._page.mouse.up()
67+
await to_elem.hover(timeout=500)
68+
await self._page.mouse.up()
5869

59-
def browser_hover(self, selector: str):
70+
async def browser_hover(self, selector: str):
6071
"""
6172
Hover over a given element.
6273
"""
63-
self._page.hover(selector)
74+
await self._page.hover(selector)
6475

65-
def browser_select_option(self, selector: str):
76+
async def browser_select_option(self, selector: str):
6677
"""
6778
Select an option from a given element.
6879
"""
69-
self._page.select_option(selector)
80+
await self._page.select_option(selector)
7081

71-
def browser_mouse_click_xy(self, x: int, y: int):
82+
async def browser_mouse_click_xy(self, x: int, y: int):
7283
"""
7384
Click at a given x, y coordinate using the mouse.
7485
"""
75-
self._page.mouse.click(x, y)
76-
77-
def initialize(self):
78-
self._browser = sync_playwright().start().chromium.launch(headless=True, chromium_sandbox=True)
79-
self._page = self._browser.new_page()
86+
await self._page.mouse.click(x, y)
8087

8188
def run_js(self, js: str):
82-
js_result = self._page.evaluate(js)
89+
js_result = self._loop.run_until_complete(self._page.evaluate(js))
8390
logger.info(f"JS result: {js_result}")
8491
return js_result
8592

8693
def goto(self, url: str):
87-
self._page.goto(url)
94+
self._loop.run_until_complete(self._page.goto(url))
8895

89-
def page_snapshot(self):
90-
return self._page.content()
96+
def page_html(self):
97+
return self._loop.run_until_complete(self._page.content())
9198

9299
def page_screenshot(self):
93-
scr_bytes = self._page.screenshot()
100+
scr_bytes = self._loop.run_until_complete(self._page.screenshot())
94101
return Image.open(BytesIO(scr_bytes))
95102

103+
def page_axtree(self):
104+
return ""
105+
96106
def step(self, action: ToolCallAction):
97107
fn = self._actions[action.function.name]
98-
action_result = fn(**action.function.arguments)
99-
snapshot = self.page_snapshot()
108+
action_result = self._loop.run_until_complete(fn(**action.function.arguments))
109+
html = self.page_html()
100110
screenshot = self.page_screenshot()
111+
axtree = self.page_axtree()
101112
return {
102-
"pruned_html": f"{action_result or ''}\n{snapshot}",
103-
"axtree_txt": snapshot,
113+
"tool_result": action_result,
114+
"pruned_html": html,
115+
"axtree_txt": axtree,
104116
"screenshot": screenshot,
105117
}
118+
106119
def actions(self) -> tuple[ToolSpec]:
107120
specs = [ToolSpec.from_function(fn) for fn in self._actions.values()]
108121
return tuple(specs)
109122

110123
def close(self):
111-
self._browser.close()
124+
self._loop.run_until_complete(self._browser.close())

0 commit comments

Comments
 (0)