1+ import asyncio
12import logging
23from io import BytesIO
34from typing import Any , Callable
45
56from PIL import Image
6- from playwright .sync_api import Page , sync_playwright
7+ from playwright .async_api import Browser , Page , async_playwright
78
89from agentlab .backends .browser .base import BrowserBackend , ToolCallAction , ToolSpec
910
1011logger = logging .getLogger (__name__ )
1112
1213
13- class PlaywrightSyncBackend (BrowserBackend ):
14+ class AsyncPlaywright (BrowserBackend ):
1415 _actions : dict [str , Callable ]
15- _browser : Any
16+ _loop : asyncio .AbstractEventLoop
17+ _browser : Browser
1618 _page : Page
1719
1820 def model_post_init (self , __context : Any ):
@@ -26,86 +28,97 @@ def model_post_init(self, __context: Any):
2628 "browser_mouse_click_xy" : self .browser_mouse_click_xy ,
2729 }
2830
29- def browser_press_key (self , key : str ):
31+ def initialize (self ):
32+ self ._loop = asyncio .get_event_loop ()
33+ self ._loop .run_until_complete (self .ainitialize ())
34+
35+ async def ainitialize (self ):
36+ pw = await async_playwright ().start ()
37+ self ._browser = await pw .chromium .launch (headless = True , chromium_sandbox = True )
38+ self ._page = await self ._browser .new_page ()
39+
40+ async def browser_press_key (self , key : str ):
3041 """
3142 Press a key on the keyboard.
3243 """
33- self ._page .keyboard .press (key )
44+ await self ._page .keyboard .press (key )
3445
35- def browser_type (self , text : str ):
46+ async def browser_type (self , text : str ):
3647 """
3748 Type text into the focused element.
3849 """
39- self ._page .type (text )
50+ await self ._page .type (text )
4051
41- def browser_click (self , selector : str ):
52+ async def browser_click (self , selector : str ):
4253 """
4354 Click on a selector.
4455 """
45- self ._page .click (selector )
56+ await self ._page .click (selector )
4657
47- def browser_drag (self , from_selector : str , to_selector : str ):
58+ async def browser_drag (self , from_selector : str , to_selector : str ):
4859 """
4960 Drag and drop from one selector to another.
5061 """
5162 from_elem = self ._page .locator (from_selector )
52- from_elem .hover (timeout = 500 )
53- self ._page .mouse .down ()
63+ await from_elem .hover (timeout = 500 )
64+ await self ._page .mouse .down ()
5465
5566 to_elem = self ._page .locator (to_selector )
56- to_elem .hover (timeout = 500 )
57- self ._page .mouse .up ()
67+ await to_elem .hover (timeout = 500 )
68+ await self ._page .mouse .up ()
5869
59- def browser_hover (self , selector : str ):
70+ async def browser_hover (self , selector : str ):
6071 """
6172 Hover over a given element.
6273 """
63- self ._page .hover (selector )
74+ await self ._page .hover (selector )
6475
65- def browser_select_option (self , selector : str ):
76+ async def browser_select_option (self , selector : str ):
6677 """
6778 Select an option from a given element.
6879 """
69- self ._page .select_option (selector )
80+ await self ._page .select_option (selector )
7081
71- def browser_mouse_click_xy (self , x : int , y : int ):
82+ async def browser_mouse_click_xy (self , x : int , y : int ):
7283 """
7384 Click at a given x, y coordinate using the mouse.
7485 """
75- self ._page .mouse .click (x , y )
76-
77- def initialize (self ):
78- self ._browser = sync_playwright ().start ().chromium .launch (headless = True , chromium_sandbox = True )
79- self ._page = self ._browser .new_page ()
86+ await self ._page .mouse .click (x , y )
8087
8188 def run_js (self , js : str ):
82- js_result = self ._page .evaluate (js )
89+ js_result = self ._loop . run_until_complete ( self . _page .evaluate (js ) )
8390 logger .info (f"JS result: { js_result } " )
8491 return js_result
8592
8693 def goto (self , url : str ):
87- self ._page .goto (url )
94+ self ._loop . run_until_complete ( self . _page .goto (url ) )
8895
89- def page_snapshot (self ):
90- return self ._page .content ()
96+ def page_html (self ):
97+ return self ._loop . run_until_complete ( self . _page .content () )
9198
9299 def page_screenshot (self ):
93- scr_bytes = self ._page .screenshot ()
100+ scr_bytes = self ._loop . run_until_complete ( self . _page .screenshot () )
94101 return Image .open (BytesIO (scr_bytes ))
95102
103+ def page_axtree (self ):
104+ return ""
105+
96106 def step (self , action : ToolCallAction ):
97107 fn = self ._actions [action .function .name ]
98- action_result = fn (** action .function .arguments )
99- snapshot = self .page_snapshot ()
108+ action_result = self . _loop . run_until_complete ( fn (** action .function .arguments ) )
109+ html = self .page_html ()
100110 screenshot = self .page_screenshot ()
111+ axtree = self .page_axtree ()
101112 return {
102- "pruned_html" : f"{ action_result or '' } \n { snapshot } " ,
103- "axtree_txt" : snapshot ,
113+ "tool_result" : action_result ,
114+ "pruned_html" : html ,
115+ "axtree_txt" : axtree ,
104116 "screenshot" : screenshot ,
105117 }
118+
106119 def actions (self ) -> tuple [ToolSpec ]:
107120 specs = [ToolSpec .from_function (fn ) for fn in self ._actions .values ()]
108121 return tuple (specs )
109122
110123 def close (self ):
111- self ._browser .close ()
124+ self ._loop . run_until_complete ( self . _browser .close () )
0 commit comments