1+ """Element staleness detection and recovery watchdog."""
2+
3+ import asyncio
4+ import logging
5+ import time
6+ from typing import TYPE_CHECKING , ClassVar , Any
7+
8+ from bubus import BaseEvent
9+
10+ from browser_use .browser .events import (
11+ BrowserErrorEvent ,
12+ ClickElementEvent ,
13+ TypeTextEvent ,
14+ UploadFileEvent ,
15+ ScrollEvent ,
16+ BrowserStateRequestEvent ,
17+ )
18+ from browser_use .browser .watchdog_base import BaseWatchdog
19+ from browser_use .dom .views import EnhancedDOMTreeNode
20+
21+ if TYPE_CHECKING :
22+ pass
23+
24+ logger = logging .getLogger (__name__ )
25+
26+
27+ class ElementStalenessWatchdog (BaseWatchdog ):
28+ """Detects and recovers from element staleness issues with intelligent retry logic."""
29+
30+ # Event contracts
31+ LISTENS_TO : ClassVar [list [type [BaseEvent ]]] = [
32+ ClickElementEvent ,
33+ TypeTextEvent ,
34+ UploadFileEvent ,
35+ ScrollEvent ,
36+ ]
37+ EMITS : ClassVar [list [type [BaseEvent ]]] = [
38+ BrowserErrorEvent ,
39+ BrowserStateRequestEvent ,
40+ ]
41+
42+ def __init__ (self , * args , ** kwargs ):
43+ super ().__init__ (* args , ** kwargs )
44+ self ._element_staleness_cache = {} # Track element staleness detection
45+ self ._retry_counts = {} # Track retry attempts per element
46+ self ._last_dom_rebuild = 0 # Track when we last rebuilt DOM
47+
48+ async def on_ClickElementEvent (self , event : ClickElementEvent ) -> dict [str , Any ] | None :
49+ """Handle click element events with staleness detection."""
50+ return await self ._handle_element_action_with_staleness_check (
51+ 'click' , event .node , event
52+ )
53+
54+ async def on_TypeTextEvent (self , event : TypeTextEvent ) -> dict | None :
55+ """Handle type text events with staleness detection."""
56+ return await self ._handle_element_action_with_staleness_check (
57+ 'type' , event .node , event
58+ )
59+
60+ async def on_UploadFileEvent (self , event : UploadFileEvent ) -> None :
61+ """Handle upload file events with staleness detection."""
62+ await self ._handle_element_action_with_staleness_check (
63+ 'upload' , event .node , event
64+ )
65+ return None
66+
67+ async def on_ScrollEvent (self , event : ScrollEvent ) -> None :
68+ """Handle scroll events with staleness detection for elements."""
69+ if event .node is not None : # Only check staleness for element-based scrolling
70+ await self ._handle_element_action_with_staleness_check (
71+ 'scroll' , event .node , event
72+ )
73+ return None
74+
75+ async def _handle_element_action_with_staleness_check (
76+ self ,
77+ action_type : str ,
78+ node : EnhancedDOMTreeNode ,
79+ original_event : BaseEvent ,
80+ ) -> Any :
81+ """Handle element actions with staleness detection and recovery."""
82+ element_key = self ._get_element_key (node )
83+ retry_key = f"{ action_type } _{ element_key } "
84+
85+ try :
86+ # Check if element appears to be stale
87+ is_stale = await self ._check_element_staleness (node )
88+
89+ if is_stale :
90+ self .logger .warning (f"🔄 Element staleness detected for { action_type } action on element { node .element_index } " )
91+ return await self ._handle_stale_element (action_type , node , original_event , retry_key )
92+
93+ # Element appears fresh, continue with normal processing
94+ self ._reset_retry_count (retry_key )
95+ return None # Let the original handler proceed
96+
97+ except Exception as e :
98+ self .logger .error (f"❌ Element staleness check failed for { action_type } : { e } " )
99+ # Don't block the action, let it proceed
100+ return None
101+
102+ async def _check_element_staleness (self , node : EnhancedDOMTreeNode ) -> bool :
103+ """Check if an element appears to be stale."""
104+ try :
105+ # Get current session for the element's target
106+ if not node .target_id :
107+ return False # Can't check without target_id
108+
109+ session = await self .browser_session .get_or_create_cdp_session (
110+ target_id = node .target_id
111+ )
112+
113+ # Check if the element still exists in the DOM
114+ try :
115+ # Try to describe the node to see if it's still valid
116+ describe_result = await session .cdp_client .send .DOM .describeNode (
117+ params = {'nodeId' : node .node_id },
118+ session_id = session .session_id
119+ )
120+
121+ # If we get here without exception, the element exists
122+ node_description = describe_result .get ('node' , {})
123+
124+ # Additional staleness indicators:
125+ # 1. Node name changed unexpectedly
126+ # 2. Node has no attributes when it should have some
127+ # 3. Node position appears invalid
128+
129+ original_name = node .node_name or ''
130+ current_name = node_description .get ('nodeName' , '' ).lower ()
131+
132+ if original_name and current_name and original_name .lower () != current_name :
133+ self .logger .debug (f"Node name changed: { original_name } -> { current_name } " )
134+ return True
135+
136+ return False # Element appears to be valid
137+
138+ except Exception as e :
139+ error_str = str (e ).lower ()
140+ # Common CDP errors indicating stale elements
141+ stale_indicators = [
142+ 'node not found' ,
143+ 'could not find node' ,
144+ 'invalid node id' ,
145+ 'node is not attached' ,
146+ 'disconnected frame' ,
147+ 'execution context destroyed'
148+ ]
149+
150+ if any (indicator in error_str for indicator in stale_indicators ):
151+ self .logger .debug (f"Staleness detected via CDP error: { error_str } " )
152+ return True
153+
154+ # Unknown error - assume not stale to avoid false positives
155+ self .logger .debug (f"Unknown error in staleness check: { e } " )
156+ return False
157+
158+ except Exception as e :
159+ self .logger .debug (f"Staleness check failed: { e } " )
160+ return False # Assume not stale if we can't check
161+
162+ async def _handle_stale_element (
163+ self ,
164+ action_type : str ,
165+ stale_node : EnhancedDOMTreeNode ,
166+ original_event : BaseEvent ,
167+ retry_key : str ,
168+ ) -> Any :
169+ """Handle stale element by rebuilding DOM and finding fresh element."""
170+ retry_count = self ._get_retry_count (retry_key )
171+ max_retries = 2 # Allow up to 2 retries for stale elements
172+
173+ if retry_count >= max_retries :
174+ error_msg = f"Element staleness retry limit exceeded for { action_type } on element { stale_node .element_index } "
175+ self .logger .error (f"❌ { error_msg } " )
176+
177+ # Emit browser error event to inform other components
178+ self .event_bus .dispatch (BrowserErrorEvent (
179+ error_type = 'ElementStalenessRetryExceeded' ,
180+ message = error_msg ,
181+ details = {
182+ 'action_type' : action_type ,
183+ 'element_index' : stale_node .element_index ,
184+ 'retry_count' : retry_count
185+ }
186+ ))
187+ return None # Let original action fail normally
188+
189+ self .logger .info (f"🔄 Attempting to recover from stale element (attempt #{ retry_count + 1 } )" )
190+
191+ try :
192+ # Increment retry count
193+ self ._increment_retry_count (retry_key )
194+
195+ # Force DOM rebuild to get fresh element references
196+ await self ._force_dom_rebuild ()
197+
198+ # Try to find the element again by its properties
199+ fresh_element = await self ._find_fresh_element_equivalent (stale_node )
200+
201+ if fresh_element :
202+ self .logger .info (f"✅ Found fresh equivalent element for { action_type } " )
203+
204+ # Update the original event with the fresh element
205+ await self ._update_event_with_fresh_element (original_event , fresh_element )
206+
207+ # Reset retry count on successful recovery
208+ self ._reset_retry_count (retry_key )
209+
210+ return None # Let the action proceed with fresh element
211+ else :
212+ self .logger .warning (f"❌ Could not find fresh equivalent element for { action_type } " )
213+ return None # Let the action fail normally
214+
215+ except Exception as e :
216+ self .logger .error (f"❌ Stale element recovery failed: { e } " )
217+ return None
218+
219+ async def _force_dom_rebuild (self ) -> None :
220+ """Force a DOM rebuild by dispatching a browser state request."""
221+ current_time = time .time ()
222+
223+ # Avoid rebuilding DOM too frequently (minimum 1 second between rebuilds)
224+ if current_time - self ._last_dom_rebuild < 1.0 :
225+ self .logger .debug ("Skipping DOM rebuild - too recent" )
226+ return
227+
228+ self .logger .debug ("🔧 Forcing DOM rebuild due to staleness" )
229+
230+ try :
231+ # Clear DOM cache in DOM watchdog
232+ dom_watchdog = self ._get_dom_watchdog ()
233+ if dom_watchdog :
234+ dom_watchdog .clear_cache ()
235+
236+ # Request fresh browser state to rebuild DOM
237+ state_request = self .event_bus .dispatch (
238+ BrowserStateRequestEvent (
239+ include_dom = True ,
240+ include_screenshot = False , # Skip screenshot for performance
241+ cache_clickable_elements_hashes = True
242+ )
243+ )
244+
245+ await state_request
246+ await state_request .event_result (raise_if_any = False , raise_if_none = False )
247+
248+ self ._last_dom_rebuild = current_time
249+ self .logger .debug ("✅ DOM rebuild completed" )
250+
251+ except Exception as e :
252+ self .logger .error (f"❌ DOM rebuild failed: { e } " )
253+
254+ async def _find_fresh_element_equivalent (self , stale_node : EnhancedDOMTreeNode ) -> EnhancedDOMTreeNode | None :
255+ """Find a fresh element equivalent to the stale one."""
256+ try :
257+ # Get the DOM watchdog to access fresh selector map
258+ dom_watchdog = self ._get_dom_watchdog ()
259+ if not dom_watchdog or not dom_watchdog .selector_map :
260+ return None
261+
262+ # Try to find by element index first (most direct match)
263+ fresh_element = dom_watchdog .selector_map .get (stale_node .element_index )
264+ if fresh_element :
265+ return fresh_element
266+
267+ # If exact index not found, try to find by element characteristics
268+ return await self ._find_by_element_characteristics (stale_node , dom_watchdog .selector_map )
269+
270+ except Exception as e :
271+ self .logger .debug (f"Fresh element lookup failed: { e } " )
272+ return None
273+
274+ async def _find_by_element_characteristics (
275+ self ,
276+ stale_node : EnhancedDOMTreeNode ,
277+ fresh_selector_map : dict [int , EnhancedDOMTreeNode ]
278+ ) -> EnhancedDOMTreeNode | None :
279+ """Find element by matching characteristics."""
280+ # Look for elements with matching node name, attributes, and position
281+ best_match = None
282+ best_match_score = 0
283+
284+ for fresh_node in fresh_selector_map .values ():
285+ score = 0
286+
287+ # Match node name
288+ if (stale_node .node_name and fresh_node .node_name and
289+ stale_node .node_name .lower () == fresh_node .node_name .lower ()):
290+ score += 10
291+
292+ # Match attributes
293+ if stale_node .attributes and fresh_node .attributes :
294+ common_attrs = set (stale_node .attributes .keys ()) & set (fresh_node .attributes .keys ())
295+ for attr in common_attrs :
296+ if stale_node .attributes [attr ] == fresh_node .attributes [attr ]:
297+ score += 2
298+
299+ # Match approximate position (within 50px tolerance)
300+ if (stale_node .absolute_position and fresh_node .absolute_position ):
301+ x_diff = abs (stale_node .absolute_position .x - fresh_node .absolute_position .x )
302+ y_diff = abs (stale_node .absolute_position .y - fresh_node .absolute_position .y )
303+ if x_diff < 50 and y_diff < 50 :
304+ score += 5
305+
306+ # Update best match if this is better
307+ if score > best_match_score and score >= 15 : # Minimum threshold
308+ best_match_score = score
309+ best_match = fresh_node
310+
311+ if best_match :
312+ self .logger .debug (f"Found element match with score { best_match_score } " )
313+
314+ return best_match
315+
316+ async def _update_event_with_fresh_element (
317+ self ,
318+ event : BaseEvent ,
319+ fresh_element : EnhancedDOMTreeNode
320+ ) -> None :
321+ """Update the event object with fresh element reference."""
322+ if hasattr (event , 'node' ):
323+ event .node = fresh_element
324+ self .logger .debug (f"Updated event with fresh element index { fresh_element .element_index } " )
325+
326+ def _get_dom_watchdog (self ):
327+ """Get the DOM watchdog instance from the browser session."""
328+ try :
329+ # Access the DOM watchdog instance directly from browser session
330+ return getattr (self .browser_session , '_dom_watchdog' , None )
331+ except Exception :
332+ return None
333+
334+ def _get_element_key (self , node : EnhancedDOMTreeNode ) -> str :
335+ """Generate a key for element identification."""
336+ return f"{ node .target_id } _{ node .element_index } _{ node .node_id } "
337+
338+ def _get_retry_count (self , retry_key : str ) -> int :
339+ """Get current retry count for a key."""
340+ return self ._retry_counts .get (retry_key , 0 )
341+
342+ def _increment_retry_count (self , retry_key : str ) -> None :
343+ """Increment retry count for a key."""
344+ self ._retry_counts [retry_key ] = self ._get_retry_count (retry_key ) + 1
345+
346+ def _reset_retry_count (self , retry_key : str ) -> None :
347+ """Reset retry count for a key."""
348+ self ._retry_counts .pop (retry_key , None )
0 commit comments