1
+ """Element staleness detection and recovery watchdog."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ from typing import TYPE_CHECKING , ClassVar , Any
7
+
8
+ from bubus import BaseEvent
9
+
10
+ from browser_use .browser .events import (
11
+ BrowserErrorEvent ,
12
+ ClickElementEvent ,
13
+ TypeTextEvent ,
14
+ UploadFileEvent ,
15
+ ScrollEvent ,
16
+ BrowserStateRequestEvent ,
17
+ )
18
+ from browser_use .browser .watchdog_base import BaseWatchdog
19
+ from browser_use .dom .views import EnhancedDOMTreeNode
20
+
21
+ if TYPE_CHECKING :
22
+ pass
23
+
24
+ logger = logging .getLogger (__name__ )
25
+
26
+
27
+ class ElementStalenessWatchdog (BaseWatchdog ):
28
+ """Detects and recovers from element staleness issues with intelligent retry logic."""
29
+
30
+ # Event contracts
31
+ LISTENS_TO : ClassVar [list [type [BaseEvent ]]] = [
32
+ ClickElementEvent ,
33
+ TypeTextEvent ,
34
+ UploadFileEvent ,
35
+ ScrollEvent ,
36
+ ]
37
+ EMITS : ClassVar [list [type [BaseEvent ]]] = [
38
+ BrowserErrorEvent ,
39
+ BrowserStateRequestEvent ,
40
+ ]
41
+
42
+ def __init__ (self , * args , ** kwargs ):
43
+ super ().__init__ (* args , ** kwargs )
44
+ self ._element_staleness_cache = {} # Track element staleness detection
45
+ self ._retry_counts = {} # Track retry attempts per element
46
+ self ._last_dom_rebuild = 0 # Track when we last rebuilt DOM
47
+
48
+ async def on_ClickElementEvent (self , event : ClickElementEvent ) -> dict [str , Any ] | None :
49
+ """Handle click element events with staleness detection."""
50
+ return await self ._handle_element_action_with_staleness_check (
51
+ 'click' , event .node , event
52
+ )
53
+
54
+ async def on_TypeTextEvent (self , event : TypeTextEvent ) -> dict | None :
55
+ """Handle type text events with staleness detection."""
56
+ return await self ._handle_element_action_with_staleness_check (
57
+ 'type' , event .node , event
58
+ )
59
+
60
+ async def on_UploadFileEvent (self , event : UploadFileEvent ) -> None :
61
+ """Handle upload file events with staleness detection."""
62
+ await self ._handle_element_action_with_staleness_check (
63
+ 'upload' , event .node , event
64
+ )
65
+ return None
66
+
67
+ async def on_ScrollEvent (self , event : ScrollEvent ) -> None :
68
+ """Handle scroll events with staleness detection for elements."""
69
+ if event .node is not None : # Only check staleness for element-based scrolling
70
+ await self ._handle_element_action_with_staleness_check (
71
+ 'scroll' , event .node , event
72
+ )
73
+ return None
74
+
75
+ async def _handle_element_action_with_staleness_check (
76
+ self ,
77
+ action_type : str ,
78
+ node : EnhancedDOMTreeNode ,
79
+ original_event : BaseEvent ,
80
+ ) -> Any :
81
+ """Handle element actions with staleness detection and recovery."""
82
+ element_key = self ._get_element_key (node )
83
+ retry_key = f"{ action_type } _{ element_key } "
84
+
85
+ try :
86
+ # Check if element appears to be stale
87
+ is_stale = await self ._check_element_staleness (node )
88
+
89
+ if is_stale :
90
+ self .logger .warning (f"🔄 Element staleness detected for { action_type } action on element { node .element_index } " )
91
+ return await self ._handle_stale_element (action_type , node , original_event , retry_key )
92
+
93
+ # Element appears fresh, continue with normal processing
94
+ self ._reset_retry_count (retry_key )
95
+ return None # Let the original handler proceed
96
+
97
+ except Exception as e :
98
+ self .logger .error (f"❌ Element staleness check failed for { action_type } : { e } " )
99
+ # Don't block the action, let it proceed
100
+ return None
101
+
102
+ async def _check_element_staleness (self , node : EnhancedDOMTreeNode ) -> bool :
103
+ """Check if an element appears to be stale."""
104
+ try :
105
+ # Get current session for the element's target
106
+ if not node .target_id :
107
+ return False # Can't check without target_id
108
+
109
+ session = await self .browser_session .get_or_create_cdp_session (
110
+ target_id = node .target_id
111
+ )
112
+
113
+ # Check if the element still exists in the DOM
114
+ try :
115
+ # Try to describe the node to see if it's still valid
116
+ describe_result = await session .cdp_client .send .DOM .describeNode (
117
+ params = {'nodeId' : node .node_id },
118
+ session_id = session .session_id
119
+ )
120
+
121
+ # If we get here without exception, the element exists
122
+ node_description = describe_result .get ('node' , {})
123
+
124
+ # Additional staleness indicators:
125
+ # 1. Node name changed unexpectedly
126
+ # 2. Node has no attributes when it should have some
127
+ # 3. Node position appears invalid
128
+
129
+ original_name = node .node_name or ''
130
+ current_name = node_description .get ('nodeName' , '' ).lower ()
131
+
132
+ if original_name and current_name and original_name .lower () != current_name :
133
+ self .logger .debug (f"Node name changed: { original_name } -> { current_name } " )
134
+ return True
135
+
136
+ return False # Element appears to be valid
137
+
138
+ except Exception as e :
139
+ error_str = str (e ).lower ()
140
+ # Common CDP errors indicating stale elements
141
+ stale_indicators = [
142
+ 'node not found' ,
143
+ 'could not find node' ,
144
+ 'invalid node id' ,
145
+ 'node is not attached' ,
146
+ 'disconnected frame' ,
147
+ 'execution context destroyed'
148
+ ]
149
+
150
+ if any (indicator in error_str for indicator in stale_indicators ):
151
+ self .logger .debug (f"Staleness detected via CDP error: { error_str } " )
152
+ return True
153
+
154
+ # Unknown error - assume not stale to avoid false positives
155
+ self .logger .debug (f"Unknown error in staleness check: { e } " )
156
+ return False
157
+
158
+ except Exception as e :
159
+ self .logger .debug (f"Staleness check failed: { e } " )
160
+ return False # Assume not stale if we can't check
161
+
162
+ async def _handle_stale_element (
163
+ self ,
164
+ action_type : str ,
165
+ stale_node : EnhancedDOMTreeNode ,
166
+ original_event : BaseEvent ,
167
+ retry_key : str ,
168
+ ) -> Any :
169
+ """Handle stale element by rebuilding DOM and finding fresh element."""
170
+ retry_count = self ._get_retry_count (retry_key )
171
+ max_retries = 2 # Allow up to 2 retries for stale elements
172
+
173
+ if retry_count >= max_retries :
174
+ error_msg = f"Element staleness retry limit exceeded for { action_type } on element { stale_node .element_index } "
175
+ self .logger .error (f"❌ { error_msg } " )
176
+
177
+ # Emit browser error event to inform other components
178
+ self .event_bus .dispatch (BrowserErrorEvent (
179
+ error_type = 'ElementStalenessRetryExceeded' ,
180
+ message = error_msg ,
181
+ details = {
182
+ 'action_type' : action_type ,
183
+ 'element_index' : stale_node .element_index ,
184
+ 'retry_count' : retry_count
185
+ }
186
+ ))
187
+ return None # Let original action fail normally
188
+
189
+ self .logger .info (f"🔄 Attempting to recover from stale element (attempt #{ retry_count + 1 } )" )
190
+
191
+ try :
192
+ # Increment retry count
193
+ self ._increment_retry_count (retry_key )
194
+
195
+ # Force DOM rebuild to get fresh element references
196
+ await self ._force_dom_rebuild ()
197
+
198
+ # Try to find the element again by its properties
199
+ fresh_element = await self ._find_fresh_element_equivalent (stale_node )
200
+
201
+ if fresh_element :
202
+ self .logger .info (f"✅ Found fresh equivalent element for { action_type } " )
203
+
204
+ # Update the original event with the fresh element
205
+ await self ._update_event_with_fresh_element (original_event , fresh_element )
206
+
207
+ # Reset retry count on successful recovery
208
+ self ._reset_retry_count (retry_key )
209
+
210
+ return None # Let the action proceed with fresh element
211
+ else :
212
+ self .logger .warning (f"❌ Could not find fresh equivalent element for { action_type } " )
213
+ return None # Let the action fail normally
214
+
215
+ except Exception as e :
216
+ self .logger .error (f"❌ Stale element recovery failed: { e } " )
217
+ return None
218
+
219
+ async def _force_dom_rebuild (self ) -> None :
220
+ """Force a DOM rebuild by dispatching a browser state request."""
221
+ current_time = time .time ()
222
+
223
+ # Avoid rebuilding DOM too frequently (minimum 1 second between rebuilds)
224
+ if current_time - self ._last_dom_rebuild < 1.0 :
225
+ self .logger .debug ("Skipping DOM rebuild - too recent" )
226
+ return
227
+
228
+ self .logger .debug ("🔧 Forcing DOM rebuild due to staleness" )
229
+
230
+ try :
231
+ # Clear DOM cache in DOM watchdog
232
+ dom_watchdog = self ._get_dom_watchdog ()
233
+ if dom_watchdog :
234
+ dom_watchdog .clear_cache ()
235
+
236
+ # Request fresh browser state to rebuild DOM
237
+ state_request = self .event_bus .dispatch (
238
+ BrowserStateRequestEvent (
239
+ include_dom = True ,
240
+ include_screenshot = False , # Skip screenshot for performance
241
+ cache_clickable_elements_hashes = True
242
+ )
243
+ )
244
+
245
+ await state_request
246
+ await state_request .event_result (raise_if_any = False , raise_if_none = False )
247
+
248
+ self ._last_dom_rebuild = current_time
249
+ self .logger .debug ("✅ DOM rebuild completed" )
250
+
251
+ except Exception as e :
252
+ self .logger .error (f"❌ DOM rebuild failed: { e } " )
253
+
254
+ async def _find_fresh_element_equivalent (self , stale_node : EnhancedDOMTreeNode ) -> EnhancedDOMTreeNode | None :
255
+ """Find a fresh element equivalent to the stale one."""
256
+ try :
257
+ # Get the DOM watchdog to access fresh selector map
258
+ dom_watchdog = self ._get_dom_watchdog ()
259
+ if not dom_watchdog or not dom_watchdog .selector_map :
260
+ return None
261
+
262
+ # Try to find by element index first (most direct match)
263
+ fresh_element = dom_watchdog .selector_map .get (stale_node .element_index )
264
+ if fresh_element :
265
+ return fresh_element
266
+
267
+ # If exact index not found, try to find by element characteristics
268
+ return await self ._find_by_element_characteristics (stale_node , dom_watchdog .selector_map )
269
+
270
+ except Exception as e :
271
+ self .logger .debug (f"Fresh element lookup failed: { e } " )
272
+ return None
273
+
274
+ async def _find_by_element_characteristics (
275
+ self ,
276
+ stale_node : EnhancedDOMTreeNode ,
277
+ fresh_selector_map : dict [int , EnhancedDOMTreeNode ]
278
+ ) -> EnhancedDOMTreeNode | None :
279
+ """Find element by matching characteristics."""
280
+ # Look for elements with matching node name, attributes, and position
281
+ best_match = None
282
+ best_match_score = 0
283
+
284
+ for fresh_node in fresh_selector_map .values ():
285
+ score = 0
286
+
287
+ # Match node name
288
+ if (stale_node .node_name and fresh_node .node_name and
289
+ stale_node .node_name .lower () == fresh_node .node_name .lower ()):
290
+ score += 10
291
+
292
+ # Match attributes
293
+ if stale_node .attributes and fresh_node .attributes :
294
+ common_attrs = set (stale_node .attributes .keys ()) & set (fresh_node .attributes .keys ())
295
+ for attr in common_attrs :
296
+ if stale_node .attributes [attr ] == fresh_node .attributes [attr ]:
297
+ score += 2
298
+
299
+ # Match approximate position (within 50px tolerance)
300
+ if (stale_node .absolute_position and fresh_node .absolute_position ):
301
+ x_diff = abs (stale_node .absolute_position .x - fresh_node .absolute_position .x )
302
+ y_diff = abs (stale_node .absolute_position .y - fresh_node .absolute_position .y )
303
+ if x_diff < 50 and y_diff < 50 :
304
+ score += 5
305
+
306
+ # Update best match if this is better
307
+ if score > best_match_score and score >= 15 : # Minimum threshold
308
+ best_match_score = score
309
+ best_match = fresh_node
310
+
311
+ if best_match :
312
+ self .logger .debug (f"Found element match with score { best_match_score } " )
313
+
314
+ return best_match
315
+
316
+ async def _update_event_with_fresh_element (
317
+ self ,
318
+ event : BaseEvent ,
319
+ fresh_element : EnhancedDOMTreeNode
320
+ ) -> None :
321
+ """Update the event object with fresh element reference."""
322
+ if hasattr (event , 'node' ):
323
+ event .node = fresh_element
324
+ self .logger .debug (f"Updated event with fresh element index { fresh_element .element_index } " )
325
+
326
+ def _get_dom_watchdog (self ):
327
+ """Get the DOM watchdog instance from the browser session."""
328
+ try :
329
+ # Access the DOM watchdog instance directly from browser session
330
+ return getattr (self .browser_session , '_dom_watchdog' , None )
331
+ except Exception :
332
+ return None
333
+
334
+ def _get_element_key (self , node : EnhancedDOMTreeNode ) -> str :
335
+ """Generate a key for element identification."""
336
+ return f"{ node .target_id } _{ node .element_index } _{ node .node_id } "
337
+
338
+ def _get_retry_count (self , retry_key : str ) -> int :
339
+ """Get current retry count for a key."""
340
+ return self ._retry_counts .get (retry_key , 0 )
341
+
342
+ def _increment_retry_count (self , retry_key : str ) -> None :
343
+ """Increment retry count for a key."""
344
+ self ._retry_counts [retry_key ] = self ._get_retry_count (retry_key ) + 1
345
+
346
+ def _reset_retry_count (self , retry_key : str ) -> None :
347
+ """Reset retry count for a key."""
348
+ self ._retry_counts .pop (retry_key , None )
0 commit comments