@@ -53,6 +53,9 @@ async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
53
53
}
54
54
55
55
try :
56
+ # Store initial URL to detect navigation
57
+ initial_url = self .page .url
58
+
56
59
if action_type == "click" :
57
60
# specific_action_model is already an instance of ClickAction
58
61
x , y = specific_action_model .x , specific_action_model .y
@@ -66,7 +69,9 @@ async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
66
69
await self ._animate_click (x , y )
67
70
await asyncio .sleep (0.1 ) # Ensure animation is visible
68
71
await self .page .mouse .click (x , y , button = button )
69
- # Consider new tab/page handling logic here if needed
72
+
73
+ # Check for page navigation
74
+ await self .handle_page_navigation ("click" , initial_url )
70
75
return {"success" : True }
71
76
72
77
elif action_type == "double_click" :
@@ -78,6 +83,9 @@ async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
78
83
await self ._animate_click (x , y )
79
84
await asyncio .sleep (0.1 )
80
85
await self .page .mouse .dblclick (x , y )
86
+
87
+ # Check for page navigation
88
+ await self .handle_page_navigation ("double_click" , initial_url )
81
89
return {"success" : True }
82
90
83
91
elif action_type == "type" :
@@ -92,6 +100,9 @@ async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
92
100
for key_str in specific_action_model .keys :
93
101
playwright_key = self ._convert_key_name (key_str )
94
102
await self .page .keyboard .press (playwright_key ) # Press each key
103
+
104
+ # Check for page navigation - keys like Enter can cause navigation
105
+ await self .handle_page_navigation ("keypress" , initial_url )
95
106
return {"success" : True }
96
107
97
108
elif action_type == "scroll" :
@@ -110,6 +121,9 @@ async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
110
121
if name == "goto" and args .url :
111
122
await self .page .goto (args .url )
112
123
return {"success" : True }
124
+ elif name == "navigate_back" :
125
+ await self .page .go_back ()
126
+ return {"success" : True }
113
127
# Add other function calls like back, forward, reload if needed, similar to TS version
114
128
self .logger .warning (
115
129
f"Unsupported function call: { name } " ,
@@ -131,6 +145,9 @@ async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
131
145
else :
132
146
# Use _convert_key_name for consistency if possible, or press directly
133
147
await self .page .keyboard .press (self ._convert_key_name (text ))
148
+
149
+ # Check for page navigation - Enter and other keys may navigate
150
+ await self .handle_page_navigation ("key" , initial_url )
134
151
return {"success" : True }
135
152
136
153
elif action_type == "wait" :
@@ -248,3 +265,68 @@ def _convert_key_name(self, key: str) -> str:
248
265
# Convert to uppercase for case-insensitive matching then check map,
249
266
# default to original key if not found.
250
267
return key_map .get (key .upper (), key )
268
+
269
+ async def _handle_page_navigation (self ) -> None :
270
+ """Handle page navigation actions."""
271
+ pass
272
+
273
+ async def handle_page_navigation (
274
+ self ,
275
+ action_description : str ,
276
+ initial_url : str ,
277
+ dom_settle_timeout_ms : int = 1000 ,
278
+ ) -> None :
279
+ """Handle possible page navigation after an action."""
280
+ self .logger .debug (
281
+ f"{ action_description } - checking for page navigation" ,
282
+ category = StagehandFunctionName .AGENT ,
283
+ )
284
+
285
+ # Check for new tab/window
286
+ new_opened_tab = None
287
+ try :
288
+ async with self .page .context .expect_page (timeout = 1500 ) as new_page_info :
289
+ # Just checking if a page was opened by the action
290
+ pass
291
+ new_opened_tab = await new_page_info .value
292
+ except Exception :
293
+ new_opened_tab = None
294
+
295
+ # Handle new tab if one was opened
296
+ if new_opened_tab :
297
+ self .logger .info (
298
+ f"New tab detected with URL: { new_opened_tab .url } " ,
299
+ category = StagehandFunctionName .AGENT ,
300
+ )
301
+ new_tab_url = new_opened_tab .url
302
+ await new_opened_tab .close ()
303
+ await self .page .goto (new_tab_url )
304
+ await self .page .wait_for_load_state ("domcontentloaded" )
305
+
306
+ # Wait for DOM to settle
307
+ try :
308
+ await self .page .wait_for_load_state (
309
+ "domcontentloaded" , timeout = dom_settle_timeout_ms
310
+ )
311
+ # Additional optional wait for network idle
312
+ await self .page .wait_for_load_state (
313
+ "networkidle" , timeout = dom_settle_timeout_ms
314
+ )
315
+ except Exception as e :
316
+ self .logger .warning (
317
+ f"Wait for DOM settle timed out: { str (e )} " ,
318
+ category = StagehandFunctionName .AGENT ,
319
+ )
320
+
321
+ # Check if URL changed
322
+ current_url = self .page .url
323
+ if current_url != initial_url :
324
+ self .logger .debug (
325
+ f"Page navigation detected: { initial_url } -> { current_url } " ,
326
+ category = StagehandFunctionName .AGENT ,
327
+ )
328
+
329
+ self .logger .debug (
330
+ "Finished checking for page navigation" ,
331
+ category = StagehandFunctionName .AGENT ,
332
+ )
0 commit comments