|
1 | | -from playwright.sync_api import sync_playwright |
| 1 | +import logging |
| 2 | +from io import BytesIO |
| 3 | +from typing import Any, Callable |
2 | 4 |
|
3 | | -from agentlab.backends.browser.base import BrowserBackend, ToolCallAction |
| 5 | +from PIL import Image |
| 6 | +from playwright.sync_api import Page, sync_playwright |
| 7 | + |
| 8 | +from agentlab.backends.browser.base import BrowserBackend, ToolCallAction, ToolSpec |
| 9 | + |
| 10 | +logger = logging.getLogger(__name__) |
4 | 11 |
|
5 | 12 |
|
6 | 13 | class PlaywrightSyncBackend(BrowserBackend): |
7 | | - def __init__(self): |
8 | | - self.actions = { |
9 | | - "browser_press_key": lambda key: self.page.keyboard.press(key), |
10 | | - "browser_type": lambda text: self.page.type(text), |
11 | | - "browser_click": lambda selector: self.page.click(selector), |
12 | | - "browser_drag": lambda from_selector, to_selector: self.drag_and_drop( |
13 | | - from_selector, to_selector |
14 | | - ), |
15 | | - "browser_hover": lambda selector: self.page.hover(selector), |
16 | | - "browser_select_option": lambda selector: self.page.select_option(selector), |
17 | | - "browser_mouse_click_xy": lambda x, y: self.page.mouse.click(x, y), |
| 14 | + _actions: dict[str, Callable] |
| 15 | + _browser: Any |
| 16 | + _page: Page |
| 17 | + |
| 18 | + def model_post_init(self, __context: Any): |
| 19 | + self._actions = { |
| 20 | + "browser_press_key": self.browser_press_key, |
| 21 | + "browser_type": self.browser_type, |
| 22 | + "browser_click": self.browser_click, |
| 23 | + "browser_drag": self.browser_drag, |
| 24 | + "browser_hover": self.browser_hover, |
| 25 | + "browser_select_option": self.browser_select_option, |
| 26 | + "browser_mouse_click_xy": self.browser_mouse_click_xy, |
18 | 27 | } |
19 | 28 |
|
20 | | - def drag_and_drop(self, from_selector: str, to_selector: str): |
21 | | - from_elem = self.page.locator(from_selector) |
| 29 | + def browser_press_key(self, key: str): |
| 30 | + """ |
| 31 | + Press a key on the keyboard. |
| 32 | + """ |
| 33 | + self._page.keyboard.press(key) |
| 34 | + |
| 35 | + def browser_type(self, text: str): |
| 36 | + """ |
| 37 | + Type text into the focused element. |
| 38 | + """ |
| 39 | + self._page.type(text) |
| 40 | + |
| 41 | + def browser_click(self, selector: str): |
| 42 | + """ |
| 43 | + Click on a selector. |
| 44 | + """ |
| 45 | + self._page.click(selector) |
| 46 | + |
| 47 | + def browser_drag(self, from_selector: str, to_selector: str): |
| 48 | + """ |
| 49 | + Drag and drop from one selector to another. |
| 50 | + """ |
| 51 | + from_elem = self._page.locator(from_selector) |
22 | 52 | from_elem.hover(timeout=500) |
23 | | - self.page.mouse.down() |
| 53 | + self._page.mouse.down() |
24 | 54 |
|
25 | | - to_elem = self.page.locator(to_selector) |
| 55 | + to_elem = self._page.locator(to_selector) |
26 | 56 | to_elem.hover(timeout=500) |
27 | | - self.page.mouse.up() |
| 57 | + self._page.mouse.up() |
| 58 | + |
| 59 | + def browser_hover(self, selector: str): |
| 60 | + """ |
| 61 | + Hover over a given element. |
| 62 | + """ |
| 63 | + self._page.hover(selector) |
| 64 | + |
| 65 | + def browser_select_option(self, selector: str): |
| 66 | + """ |
| 67 | + Select an option from a given element. |
| 68 | + """ |
| 69 | + self._page.select_option(selector) |
| 70 | + |
| 71 | + def browser_mouse_click_xy(self, x: int, y: int): |
| 72 | + """ |
| 73 | + Click at a given x, y coordinate using the mouse. |
| 74 | + """ |
| 75 | + self._page.mouse.click(x, y) |
28 | 76 |
|
29 | 77 | def initialize(self): |
30 | | - self.browser = sync_playwright().start().chromium.launch(headless=True) |
31 | | - self.page = self.browser.new_page() |
| 78 | + self._browser = sync_playwright().start().chromium.launch(headless=True, chromium_sandbox=True) |
| 79 | + self._page = self._browser.new_page() |
32 | 80 |
|
33 | 81 | def run_js(self, js: str): |
34 | | - return self.page.evaluate(js) |
| 82 | + js_result = self._page.evaluate(js) |
| 83 | + logger.info(f"JS result: {js_result}") |
| 84 | + return js_result |
35 | 85 |
|
36 | 86 | def goto(self, url: str): |
37 | | - self.page.goto(url) |
| 87 | + self._page.goto(url) |
38 | 88 |
|
39 | 89 | def page_snapshot(self): |
40 | | - return self.page.content() |
| 90 | + return self._page.content() |
41 | 91 |
|
42 | 92 | def page_screenshot(self): |
43 | | - return self.page.screenshot() |
| 93 | + scr_bytes = self._page.screenshot() |
| 94 | + return Image.open(BytesIO(scr_bytes)) |
44 | 95 |
|
45 | 96 | def step(self, action: ToolCallAction): |
46 | | - fn = self.actions[action.function.name] |
47 | | - return fn(**action.function.arguments) |
48 | | - |
49 | | - def actions(self): |
50 | | - return self.page.actions() |
| 97 | + fn = self._actions[action.function.name] |
| 98 | + action_result = fn(**action.function.arguments) |
| 99 | + snapshot = self.page_snapshot() |
| 100 | + screenshot = self.page_screenshot() |
| 101 | + return { |
| 102 | + "pruned_html": f"{action_result or ''}\n{snapshot}", |
| 103 | + "axtree_txt": snapshot, |
| 104 | + "screenshot": screenshot, |
| 105 | + } |
| 106 | + def actions(self) -> tuple[ToolSpec]: |
| 107 | + specs = [ToolSpec.from_function(fn) for fn in self._actions.values()] |
| 108 | + return tuple(specs) |
51 | 109 |
|
52 | 110 | def close(self): |
53 | | - self.browser.close() |
| 111 | + self._browser.close() |
0 commit comments