Skip to content

Commit dc36acd

Browse files
chrisschnablclaude
andcommitted
fix(browser-use): Enhanced reliability improvements for browser automation
- Added ElementStalenessWatchdog for intelligent stale element detection and recovery - Implemented enhanced error classification system with retry strategies - Improved error handling in tools service with context-aware classification - Added comprehensive pattern matching for network, timing, and resource errors 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 03c637e commit dc36acd

File tree

4 files changed

+680
-5
lines changed

4 files changed

+680
-5
lines changed

browser_use/browser/session.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,7 @@ async def attach_all_watchdogs(self) -> None:
990990
from browser_use.browser.watchdogs.network_reliability_watchdog import NetworkReliabilityWatchdog
991991
from browser_use.browser.watchdogs.form_reliability_watchdog import FormReliabilityWatchdog
992992
from browser_use.browser.watchdogs.dynamic_content_watchdog import DynamicContentWatchdog
993+
from browser_use.browser.watchdogs.element_staleness_watchdog import ElementStalenessWatchdog
993994

994995
# Initialize CrashWatchdog
995996
# CrashWatchdog.model_rebuild()
@@ -1125,6 +1126,11 @@ async def attach_all_watchdogs(self) -> None:
11251126
self._dynamic_content_watchdog = DynamicContentWatchdog(event_bus=self.event_bus, browser_session=self)
11261127
self._dynamic_content_watchdog.attach_to_session()
11271128

1129+
# Initialize ElementStalenessWatchdog (handles stale element detection and recovery)
1130+
ElementStalenessWatchdog.model_rebuild()
1131+
self._element_staleness_watchdog = ElementStalenessWatchdog(event_bus=self.event_bus, browser_session=self)
1132+
self._element_staleness_watchdog.attach_to_session()
1133+
11281134
# Mark watchdogs as attached to prevent duplicate attachment
11291135
self._watchdogs_attached = True
11301136

Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
"""Element staleness detection and recovery watchdog."""
2+
3+
import asyncio
4+
import logging
5+
import time
6+
from typing import TYPE_CHECKING, ClassVar, Any
7+
8+
from bubus import BaseEvent
9+
10+
from browser_use.browser.events import (
11+
BrowserErrorEvent,
12+
ClickElementEvent,
13+
TypeTextEvent,
14+
UploadFileEvent,
15+
ScrollEvent,
16+
BrowserStateRequestEvent,
17+
)
18+
from browser_use.browser.watchdog_base import BaseWatchdog
19+
from browser_use.dom.views import EnhancedDOMTreeNode
20+
21+
if TYPE_CHECKING:
22+
pass
23+
24+
logger = logging.getLogger(__name__)
25+
26+
27+
class ElementStalenessWatchdog(BaseWatchdog):
28+
"""Detects and recovers from element staleness issues with intelligent retry logic."""
29+
30+
# Event contracts
31+
LISTENS_TO: ClassVar[list[type[BaseEvent]]] = [
32+
ClickElementEvent,
33+
TypeTextEvent,
34+
UploadFileEvent,
35+
ScrollEvent,
36+
]
37+
EMITS: ClassVar[list[type[BaseEvent]]] = [
38+
BrowserErrorEvent,
39+
BrowserStateRequestEvent,
40+
]
41+
42+
def __init__(self, *args, **kwargs):
43+
super().__init__(*args, **kwargs)
44+
self._element_staleness_cache = {} # Track element staleness detection
45+
self._retry_counts = {} # Track retry attempts per element
46+
self._last_dom_rebuild = 0 # Track when we last rebuilt DOM
47+
48+
async def on_ClickElementEvent(self, event: ClickElementEvent) -> dict[str, Any] | None:
49+
"""Handle click element events with staleness detection."""
50+
return await self._handle_element_action_with_staleness_check(
51+
'click', event.node, event
52+
)
53+
54+
async def on_TypeTextEvent(self, event: TypeTextEvent) -> dict | None:
55+
"""Handle type text events with staleness detection."""
56+
return await self._handle_element_action_with_staleness_check(
57+
'type', event.node, event
58+
)
59+
60+
async def on_UploadFileEvent(self, event: UploadFileEvent) -> None:
61+
"""Handle upload file events with staleness detection."""
62+
await self._handle_element_action_with_staleness_check(
63+
'upload', event.node, event
64+
)
65+
return None
66+
67+
async def on_ScrollEvent(self, event: ScrollEvent) -> None:
68+
"""Handle scroll events with staleness detection for elements."""
69+
if event.node is not None: # Only check staleness for element-based scrolling
70+
await self._handle_element_action_with_staleness_check(
71+
'scroll', event.node, event
72+
)
73+
return None
74+
75+
async def _handle_element_action_with_staleness_check(
76+
self,
77+
action_type: str,
78+
node: EnhancedDOMTreeNode,
79+
original_event: BaseEvent,
80+
) -> Any:
81+
"""Handle element actions with staleness detection and recovery."""
82+
element_key = self._get_element_key(node)
83+
retry_key = f"{action_type}_{element_key}"
84+
85+
try:
86+
# Check if element appears to be stale
87+
is_stale = await self._check_element_staleness(node)
88+
89+
if is_stale:
90+
self.logger.warning(f"🔄 Element staleness detected for {action_type} action on element {node.element_index}")
91+
return await self._handle_stale_element(action_type, node, original_event, retry_key)
92+
93+
# Element appears fresh, continue with normal processing
94+
self._reset_retry_count(retry_key)
95+
return None # Let the original handler proceed
96+
97+
except Exception as e:
98+
self.logger.error(f"❌ Element staleness check failed for {action_type}: {e}")
99+
# Don't block the action, let it proceed
100+
return None
101+
102+
async def _check_element_staleness(self, node: EnhancedDOMTreeNode) -> bool:
103+
"""Check if an element appears to be stale."""
104+
try:
105+
# Get current session for the element's target
106+
if not node.target_id:
107+
return False # Can't check without target_id
108+
109+
session = await self.browser_session.get_or_create_cdp_session(
110+
target_id=node.target_id
111+
)
112+
113+
# Check if the element still exists in the DOM
114+
try:
115+
# Try to describe the node to see if it's still valid
116+
describe_result = await session.cdp_client.send.DOM.describeNode(
117+
params={'nodeId': node.node_id},
118+
session_id=session.session_id
119+
)
120+
121+
# If we get here without exception, the element exists
122+
node_description = describe_result.get('node', {})
123+
124+
# Additional staleness indicators:
125+
# 1. Node name changed unexpectedly
126+
# 2. Node has no attributes when it should have some
127+
# 3. Node position appears invalid
128+
129+
original_name = node.node_name or ''
130+
current_name = node_description.get('nodeName', '').lower()
131+
132+
if original_name and current_name and original_name.lower() != current_name:
133+
self.logger.debug(f"Node name changed: {original_name} -> {current_name}")
134+
return True
135+
136+
return False # Element appears to be valid
137+
138+
except Exception as e:
139+
error_str = str(e).lower()
140+
# Common CDP errors indicating stale elements
141+
stale_indicators = [
142+
'node not found',
143+
'could not find node',
144+
'invalid node id',
145+
'node is not attached',
146+
'disconnected frame',
147+
'execution context destroyed'
148+
]
149+
150+
if any(indicator in error_str for indicator in stale_indicators):
151+
self.logger.debug(f"Staleness detected via CDP error: {error_str}")
152+
return True
153+
154+
# Unknown error - assume not stale to avoid false positives
155+
self.logger.debug(f"Unknown error in staleness check: {e}")
156+
return False
157+
158+
except Exception as e:
159+
self.logger.debug(f"Staleness check failed: {e}")
160+
return False # Assume not stale if we can't check
161+
162+
async def _handle_stale_element(
163+
self,
164+
action_type: str,
165+
stale_node: EnhancedDOMTreeNode,
166+
original_event: BaseEvent,
167+
retry_key: str,
168+
) -> Any:
169+
"""Handle stale element by rebuilding DOM and finding fresh element."""
170+
retry_count = self._get_retry_count(retry_key)
171+
max_retries = 2 # Allow up to 2 retries for stale elements
172+
173+
if retry_count >= max_retries:
174+
error_msg = f"Element staleness retry limit exceeded for {action_type} on element {stale_node.element_index}"
175+
self.logger.error(f"❌ {error_msg}")
176+
177+
# Emit browser error event to inform other components
178+
self.event_bus.dispatch(BrowserErrorEvent(
179+
error_type='ElementStalenessRetryExceeded',
180+
message=error_msg,
181+
details={
182+
'action_type': action_type,
183+
'element_index': stale_node.element_index,
184+
'retry_count': retry_count
185+
}
186+
))
187+
return None # Let original action fail normally
188+
189+
self.logger.info(f"🔄 Attempting to recover from stale element (attempt #{retry_count + 1})")
190+
191+
try:
192+
# Increment retry count
193+
self._increment_retry_count(retry_key)
194+
195+
# Force DOM rebuild to get fresh element references
196+
await self._force_dom_rebuild()
197+
198+
# Try to find the element again by its properties
199+
fresh_element = await self._find_fresh_element_equivalent(stale_node)
200+
201+
if fresh_element:
202+
self.logger.info(f"✅ Found fresh equivalent element for {action_type}")
203+
204+
# Update the original event with the fresh element
205+
await self._update_event_with_fresh_element(original_event, fresh_element)
206+
207+
# Reset retry count on successful recovery
208+
self._reset_retry_count(retry_key)
209+
210+
return None # Let the action proceed with fresh element
211+
else:
212+
self.logger.warning(f"❌ Could not find fresh equivalent element for {action_type}")
213+
return None # Let the action fail normally
214+
215+
except Exception as e:
216+
self.logger.error(f"❌ Stale element recovery failed: {e}")
217+
return None
218+
219+
async def _force_dom_rebuild(self) -> None:
220+
"""Force a DOM rebuild by dispatching a browser state request."""
221+
current_time = time.time()
222+
223+
# Avoid rebuilding DOM too frequently (minimum 1 second between rebuilds)
224+
if current_time - self._last_dom_rebuild < 1.0:
225+
self.logger.debug("Skipping DOM rebuild - too recent")
226+
return
227+
228+
self.logger.debug("🔧 Forcing DOM rebuild due to staleness")
229+
230+
try:
231+
# Clear DOM cache in DOM watchdog
232+
dom_watchdog = self._get_dom_watchdog()
233+
if dom_watchdog:
234+
dom_watchdog.clear_cache()
235+
236+
# Request fresh browser state to rebuild DOM
237+
state_request = self.event_bus.dispatch(
238+
BrowserStateRequestEvent(
239+
include_dom=True,
240+
include_screenshot=False, # Skip screenshot for performance
241+
cache_clickable_elements_hashes=True
242+
)
243+
)
244+
245+
await state_request
246+
await state_request.event_result(raise_if_any=False, raise_if_none=False)
247+
248+
self._last_dom_rebuild = current_time
249+
self.logger.debug("✅ DOM rebuild completed")
250+
251+
except Exception as e:
252+
self.logger.error(f"❌ DOM rebuild failed: {e}")
253+
254+
async def _find_fresh_element_equivalent(self, stale_node: EnhancedDOMTreeNode) -> EnhancedDOMTreeNode | None:
255+
"""Find a fresh element equivalent to the stale one."""
256+
try:
257+
# Get the DOM watchdog to access fresh selector map
258+
dom_watchdog = self._get_dom_watchdog()
259+
if not dom_watchdog or not dom_watchdog.selector_map:
260+
return None
261+
262+
# Try to find by element index first (most direct match)
263+
fresh_element = dom_watchdog.selector_map.get(stale_node.element_index)
264+
if fresh_element:
265+
return fresh_element
266+
267+
# If exact index not found, try to find by element characteristics
268+
return await self._find_by_element_characteristics(stale_node, dom_watchdog.selector_map)
269+
270+
except Exception as e:
271+
self.logger.debug(f"Fresh element lookup failed: {e}")
272+
return None
273+
274+
async def _find_by_element_characteristics(
275+
self,
276+
stale_node: EnhancedDOMTreeNode,
277+
fresh_selector_map: dict[int, EnhancedDOMTreeNode]
278+
) -> EnhancedDOMTreeNode | None:
279+
"""Find element by matching characteristics."""
280+
# Look for elements with matching node name, attributes, and position
281+
best_match = None
282+
best_match_score = 0
283+
284+
for fresh_node in fresh_selector_map.values():
285+
score = 0
286+
287+
# Match node name
288+
if (stale_node.node_name and fresh_node.node_name and
289+
stale_node.node_name.lower() == fresh_node.node_name.lower()):
290+
score += 10
291+
292+
# Match attributes
293+
if stale_node.attributes and fresh_node.attributes:
294+
common_attrs = set(stale_node.attributes.keys()) & set(fresh_node.attributes.keys())
295+
for attr in common_attrs:
296+
if stale_node.attributes[attr] == fresh_node.attributes[attr]:
297+
score += 2
298+
299+
# Match approximate position (within 50px tolerance)
300+
if (stale_node.absolute_position and fresh_node.absolute_position):
301+
x_diff = abs(stale_node.absolute_position.x - fresh_node.absolute_position.x)
302+
y_diff = abs(stale_node.absolute_position.y - fresh_node.absolute_position.y)
303+
if x_diff < 50 and y_diff < 50:
304+
score += 5
305+
306+
# Update best match if this is better
307+
if score > best_match_score and score >= 15: # Minimum threshold
308+
best_match_score = score
309+
best_match = fresh_node
310+
311+
if best_match:
312+
self.logger.debug(f"Found element match with score {best_match_score}")
313+
314+
return best_match
315+
316+
async def _update_event_with_fresh_element(
317+
self,
318+
event: BaseEvent,
319+
fresh_element: EnhancedDOMTreeNode
320+
) -> None:
321+
"""Update the event object with fresh element reference."""
322+
if hasattr(event, 'node'):
323+
event.node = fresh_element
324+
self.logger.debug(f"Updated event with fresh element index {fresh_element.element_index}")
325+
326+
def _get_dom_watchdog(self):
327+
"""Get the DOM watchdog instance from the browser session."""
328+
try:
329+
# Access the DOM watchdog instance directly from browser session
330+
return getattr(self.browser_session, '_dom_watchdog', None)
331+
except Exception:
332+
return None
333+
334+
def _get_element_key(self, node: EnhancedDOMTreeNode) -> str:
335+
"""Generate a key for element identification."""
336+
return f"{node.target_id}_{node.element_index}_{node.node_id}"
337+
338+
def _get_retry_count(self, retry_key: str) -> int:
339+
"""Get current retry count for a key."""
340+
return self._retry_counts.get(retry_key, 0)
341+
342+
def _increment_retry_count(self, retry_key: str) -> None:
343+
"""Increment retry count for a key."""
344+
self._retry_counts[retry_key] = self._get_retry_count(retry_key) + 1
345+
346+
def _reset_retry_count(self, retry_key: str) -> None:
347+
"""Reset retry count for a key."""
348+
self._retry_counts.pop(retry_key, None)

0 commit comments

Comments
 (0)