@@ -13,7 +13,130 @@ class TaskInput(TypedDict):
1313
1414# LLM API Keys are set in the environment during `kernel deploy <filename> -e OPENAI_API_KEY=XXX`
1515# See https://docs.onkernel.com/launch/deploy#environment-variables
16- llm = ChatOpenAI (model = "gpt-4o" )
16+ llm = ChatOpenAI (model = "gpt-4o-mini" )
17+
18+
19+ # Define a subclass of BrowserSession that overrides _setup_viewports (which mishandles resizeing on connecting via cdp)
20+ class BrowserSessionCustomResize (BrowserSession ):
21+ async def _setup_viewports (self ) -> None :
22+ """Resize any existing page viewports to match the configured size, set up storage_state, permissions, geolocation, etc."""
23+
24+ assert self .browser_context , 'BrowserSession.browser_context must already be set up before calling _setup_viewports()'
25+
26+ self .browser_profile .window_size = {"width" : 1024 , "height" : 786 }
27+ self .browser_profile .viewport = {"width" : 1024 , "height" : 786 }
28+ self .browser_profile .screen = {"width" : 1024 , "height" : 786 }
29+ self .browser_profile .device_scale_factor = 1.0
30+
31+ # log the viewport settings to terminal
32+ viewport = self .browser_profile .viewport
33+ print (
34+ '📐 Setting up viewport: '
35+ + f'headless={ self .browser_profile .headless } '
36+ + (
37+ f'window={ self .browser_profile .window_size ["width" ]} x{ self .browser_profile .window_size ["height" ]} px '
38+ if self .browser_profile .window_size
39+ else '(no window) '
40+ )
41+ + (
42+ f'screen={ self .browser_profile .screen ["width" ]} x{ self .browser_profile .screen ["height" ]} px '
43+ if self .browser_profile .screen
44+ else ''
45+ )
46+ + (f'viewport={ viewport ["width" ]} x{ viewport ["height" ]} px ' if viewport else '(no viewport) ' )
47+ + f'device_scale_factor={ self .browser_profile .device_scale_factor or 1.0 } '
48+ + f'is_mobile={ self .browser_profile .is_mobile } '
49+ + (f'color_scheme={ self .browser_profile .color_scheme .value } ' if self .browser_profile .color_scheme else '' )
50+ + (f'locale={ self .browser_profile .locale } ' if self .browser_profile .locale else '' )
51+ + (f'timezone_id={ self .browser_profile .timezone_id } ' if self .browser_profile .timezone_id else '' )
52+ + (f'geolocation={ self .browser_profile .geolocation } ' if self .browser_profile .geolocation else '' )
53+ + (f'permissions={ "," .join (self .browser_profile .permissions or ["<none>" ])} ' )
54+ )
55+
56+ # if we have any viewport settings in the profile, make sure to apply them to the entire browser_context as defaults
57+ if self .browser_profile .permissions :
58+ try :
59+ await self .browser_context .grant_permissions (self .browser_profile .permissions )
60+ except Exception as e :
61+ self .logger .warning (
62+ f'⚠️ Failed to grant browser permissions { self .browser_profile .permissions } : { type (e ).__name__ } : { e } '
63+ )
64+ try :
65+ if self .browser_profile .default_timeout :
66+ self .browser_context .set_default_timeout (self .browser_profile .default_timeout )
67+ if self .browser_profile .default_navigation_timeout :
68+ self .browser_context .set_default_navigation_timeout (self .browser_profile .default_navigation_timeout )
69+ except Exception as e :
70+ self .logger .warning (
71+ f'⚠️ Failed to set playwright timeout settings '
72+ f'cdp_api={ self .browser_profile .default_timeout } '
73+ f'navigation={ self .browser_profile .default_navigation_timeout } : { type (e ).__name__ } : { e } '
74+ )
75+ try :
76+ if self .browser_profile .extra_http_headers :
77+ self .browser_context .set_extra_http_headers (self .browser_profile .extra_http_headers )
78+ except Exception as e :
79+ self .logger .warning (
80+ f'⚠️ Failed to setup playwright extra_http_headers: { type (e ).__name__ } : { e } '
81+ ) # dont print the secret header contents in the logs!
82+
83+ try :
84+ if self .browser_profile .geolocation :
85+ await self .browser_context .set_geolocation (self .browser_profile .geolocation )
86+ except Exception as e :
87+ self .logger .warning (
88+ f'⚠️ Failed to update browser geolocation { self .browser_profile .geolocation } : { type (e ).__name__ } : { e } '
89+ )
90+
91+ await self .load_storage_state ()
92+
93+ page = None
94+
95+ for page in self .browser_context .pages :
96+ # apply viewport size settings to any existing pages
97+ if viewport :
98+ await page .set_viewport_size (viewport )
99+
100+ # show browser-use dvd screensaver-style bouncing loading animation on any about:blank pages
101+ if page .url == 'about:blank' :
102+ await self ._show_dvd_screensaver_loading_animation (page )
103+
104+ page = page or (await self .browser_context .new_page ())
105+
106+ if (not viewport ) and (self .browser_profile .window_size is not None ) and not self .browser_profile .headless :
107+ # attempt to resize the actual browser window
108+
109+ # cdp api: https://chromedevtools.github.io/devtools-protocol/tot/Browser/#method-setWindowBounds
110+ try :
111+ cdp_session = await page .context .new_cdp_session (page )
112+ window_id_result = await cdp_session .send ('Browser.getWindowForTarget' )
113+ await cdp_session .send (
114+ 'Browser.setWindowBounds' ,
115+ {
116+ 'windowId' : window_id_result ['windowId' ],
117+ 'bounds' : {
118+ ** self .browser_profile .window_size ,
119+ 'windowState' : 'normal' , # Ensure window is not minimized/maximized
120+ },
121+ },
122+ )
123+ await cdp_session .detach ()
124+ except Exception as e :
125+ _log_size = lambda size : f'{ size ["width" ]} x{ size ["height" ]} px'
126+ try :
127+ # fallback to javascript resize if cdp setWindowBounds fails
128+ await page .evaluate (
129+ """(width, height) => {window.resizeTo(width, height)}""" ,
130+ ** self .browser_profile .window_size ,
131+ )
132+ return
133+ except Exception as e :
134+ pass
135+
136+ self .logger .warning (
137+ f'⚠️ Failed to resize browser window to { _log_size (self .browser_profile .window_size )} using CDP setWindowBounds: { type (e ).__name__ } : { e } '
138+ )
139+
17140
18141@app .action ("bu-task" )
19142async def bu_task (ctx : kernel .KernelContext , input_data : TaskInput ):
@@ -37,7 +160,7 @@ async def bu_task(ctx: kernel.KernelContext, input_data: TaskInput):
37160 #task="Compare the price of gpt-4o and DeepSeek-V3",
38161 task = input_data ["task" ],
39162 llm = llm ,
40- browser_session = BrowserSession (cdp_url = kernel_browser .cdp_ws_url )
163+ browser_session = BrowserSessionCustomResize (cdp_url = kernel_browser .cdp_ws_url )
41164 )
42165 result = await agent .run ()
43166 if result .final_result () is not None :
0 commit comments