Skip to content

Commit e41b156

Browse files
authored
Paint order filtering (#3007)
<!-- This is an auto-generated description by cubic. --> ## Summary by cubic Add paint-order-based filtering to DOM serialization to hide elements fully covered by later-painted layers. This reduces noisy nodes, improves relevance, and is enabled by default via BrowserProfile.paint_order_filtering. - New Features - Added PaintOrderRemover with rectangle-union occlusion by paint_order and bounds; marks nodes as ignored_by_paint_order. - Serializer runs paint-order pass before optimization; skips interactive indexing for ignored nodes. - Considers background-color and opacity; ignores transparent backgrounds and opacity < 0.8 when building occlusion. - Added paint_order_filtering flag to BrowserProfile, Session, and DomService; playground wired for testing. - Included background-color in computed styles; added guards to timing percent calculations. - Migration - Serialized output now includes ignored_by_paint_order (SimplifiedNode) and is_visible (EnhancedDOMTreeNode). - To disable, set paint_order_filtering=False on BrowserProfile, BrowserSession, or DomService. <!-- End of auto-generated description by cubic. -->
2 parents 0e4c3ef + 2768c4c commit e41b156

File tree

10 files changed

+256
-26
lines changed

10 files changed

+256
-26
lines changed

browser_use/browser/profile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,7 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro
604604
filter_highlight_ids: bool = Field(
605605
default=True, description='Only show element IDs in highlights if llm_representation is less than 10 characters.'
606606
)
607+
paint_order_filtering: bool = Field(default=True, description='Enable paint order filtering. Slightly experimental.')
607608

608609
# --- Downloads ---
609610
auto_download_pdfs: bool = Field(default=True, description='Automatically download PDFs when navigating to PDF viewer pages.')

browser_use/browser/session.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,15 +259,17 @@ def __init__(
259259
enable_default_extensions: bool | None = None,
260260
window_size: dict | None = None,
261261
window_position: dict | None = None,
262-
cross_origin_iframes: bool | None = None,
263262
minimum_wait_page_load_time: float | None = None,
264263
wait_for_network_idle_page_load_time: float | None = None,
265264
wait_between_actions: float | None = None,
266-
highlight_elements: bool | None = None,
267265
filter_highlight_ids: bool | None = None,
268266
auto_download_pdfs: bool | None = None,
269267
profile_directory: str | None = None,
270268
cookie_whitelist_domains: list[str] | None = None,
269+
# DOM extraction layer configuration
270+
cross_origin_iframes: bool | None = None,
271+
highlight_elements: bool | None = None,
272+
paint_order_filtering: bool | None = None,
271273
):
272274
# Following the same pattern as AgentSettings in service.py
273275
# Only pass non-None values to avoid validation errors

browser_use/browser/watchdogs/dom_watchdog.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ async def _build_dom_tree_without_highlights(self, previous_state: SerializedDOM
360360
browser_session=self.browser_session,
361361
logger=self.logger,
362362
cross_origin_iframes=self.browser_session.browser_profile.cross_origin_iframes,
363+
paint_order_filtering=self.browser_session.browser_profile.paint_order_filtering,
363364
)
364365

365366
# Get serialized DOM tree using the service

browser_use/dom/enhanced_snapshot.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
'cursor', # Used in enhanced_snapshot.py cursor extraction
2727
'pointer-events', # Used for clickability logic
2828
'position', # Used for visibility logic
29+
'background-color', # Used for visibility logic
2930
]
3031

3132

browser_use/dom/playground/extraction.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,14 @@ async def test_focus_vs_all_elements():
2727
wait_for_network_idle_page_load_time=1,
2828
headless=False,
2929
args=['--incognito'],
30+
paint_order_filtering=True,
3031
),
3132
)
3233

3334
# 10 Sample websites with various interactive elements
3435
sample_websites = [
36+
'https://www.google.com/travel/flights',
37+
'https://v0-simple-ui-test-site.vercel.app',
3538
'https://browser-use.github.io/stress-tests/challenges/iframe-inception-level1.html',
3639
'https://browser-use.github.io/stress-tests/challenges/angular-form.html',
3740
'https://www.google.com/travel/flights',
@@ -194,7 +197,7 @@ def get_website_list_for_prompt() -> str:
194197

195198
# Calculate percentages
196199
total_time = all_timing.get('get_state_summary_total', 0)
197-
if total_time > 0:
200+
if total_time > 0 and total_elements > 0:
198201
timing_text += '\n📈 PERCENTAGE BREAKDOWN:\n'
199202
timing_text += f'{"─" * 30}\n'
200203
for key, value in all_timing.items():
@@ -205,7 +208,7 @@ def get_website_list_for_prompt() -> str:
205208
timing_text += '\n🎯 CLICKABLE DETECTION ANALYSIS:\n'
206209
timing_text += f'{"─" * 35}\n'
207210
clickable_time = all_timing.get('clickable_detection_time', 0)
208-
if clickable_time > 0:
211+
if clickable_time > 0 and total_elements > 0:
209212
avg_per_element = (clickable_time / total_elements) * 1000000 # microseconds
210213
timing_text += f'Total clickable detection time: {clickable_time * 1000:.2f} ms\n'
211214
timing_text += f'Average per element: {avg_per_element:.2f} μs\n'
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
from collections import defaultdict
2+
from dataclasses import dataclass
3+
4+
from browser_use.dom.views import SimplifiedNode
5+
6+
"""
7+
Helper class for maintaining a union of rectangles (used for order of elements calculation)
8+
"""
9+
10+
11+
@dataclass(frozen=True, slots=True)
12+
class Rect:
13+
"""Closed axis-aligned rectangle with (x1,y1) bottom-left, (x2,y2) top-right."""
14+
15+
x1: float
16+
y1: float
17+
x2: float
18+
y2: float
19+
20+
def __post_init__(self):
21+
if not (self.x1 <= self.x2 and self.y1 <= self.y2):
22+
return False
23+
24+
# --- fast relations ----------------------------------------------------
25+
def area(self) -> float:
26+
return (self.x2 - self.x1) * (self.y2 - self.y1)
27+
28+
def intersects(self, other: 'Rect') -> bool:
29+
return not (self.x2 <= other.x1 or other.x2 <= self.x1 or self.y2 <= other.y1 or other.y2 <= self.y1)
30+
31+
def contains(self, other: 'Rect') -> bool:
32+
return self.x1 <= other.x1 and self.y1 <= other.y1 and self.x2 >= other.x2 and self.y2 >= other.y2
33+
34+
35+
class RectUnionPure:
36+
"""
37+
Maintains a *disjoint* set of rectangles.
38+
No external dependencies - fine for a few thousand rectangles.
39+
"""
40+
41+
__slots__ = ('_rects',)
42+
43+
def __init__(self):
44+
self._rects: list[Rect] = []
45+
46+
# -----------------------------------------------------------------
47+
def _split_diff(self, a: Rect, b: Rect) -> list[Rect]:
48+
r"""
49+
Return list of up to 4 rectangles = a \ b.
50+
Assumes a intersects b.
51+
"""
52+
parts = []
53+
54+
# Bottom slice
55+
if a.y1 < b.y1:
56+
parts.append(Rect(a.x1, a.y1, a.x2, b.y1))
57+
# Top slice
58+
if b.y2 < a.y2:
59+
parts.append(Rect(a.x1, b.y2, a.x2, a.y2))
60+
61+
# Middle (vertical) strip: y overlap is [max(a.y1,b.y1), min(a.y2,b.y2)]
62+
y_lo = max(a.y1, b.y1)
63+
y_hi = min(a.y2, b.y2)
64+
65+
# Left slice
66+
if a.x1 < b.x1:
67+
parts.append(Rect(a.x1, y_lo, b.x1, y_hi))
68+
# Right slice
69+
if b.x2 < a.x2:
70+
parts.append(Rect(b.x2, y_lo, a.x2, y_hi))
71+
72+
return parts
73+
74+
# -----------------------------------------------------------------
75+
def contains(self, r: Rect) -> bool:
76+
"""
77+
True iff r is fully covered by the current union.
78+
"""
79+
if not self._rects:
80+
return False
81+
82+
stack = [r]
83+
for s in self._rects:
84+
new_stack = []
85+
for piece in stack:
86+
if s.contains(piece):
87+
# piece completely gone
88+
continue
89+
if piece.intersects(s):
90+
new_stack.extend(self._split_diff(piece, s))
91+
else:
92+
new_stack.append(piece)
93+
if not new_stack: # everything eaten – covered
94+
return True
95+
stack = new_stack
96+
return False # something survived
97+
98+
# -----------------------------------------------------------------
99+
def add(self, r: Rect) -> bool:
100+
"""
101+
Insert r unless it is already covered.
102+
Returns True if the union grew.
103+
"""
104+
if self.contains(r):
105+
return False
106+
107+
pending = [r]
108+
i = 0
109+
while i < len(self._rects):
110+
s = self._rects[i]
111+
new_pending = []
112+
changed = False
113+
for piece in pending:
114+
if piece.intersects(s):
115+
new_pending.extend(self._split_diff(piece, s))
116+
changed = True
117+
else:
118+
new_pending.append(piece)
119+
pending = new_pending
120+
if changed:
121+
# s unchanged; proceed with next existing rectangle
122+
i += 1
123+
else:
124+
i += 1
125+
126+
# Any left‑over pieces are new, non‑overlapping areas
127+
self._rects.extend(pending)
128+
return True
129+
130+
131+
class PaintOrderRemover:
132+
"""
133+
Calculates which elements should be removed based on the paint order parameter.
134+
"""
135+
136+
def __init__(self, root: SimplifiedNode):
137+
self.root = root
138+
139+
def calculate_paint_order(self) -> None:
140+
all_simplified_nodes_with_paint_order: list[SimplifiedNode] = []
141+
142+
def collect_paint_order(node: SimplifiedNode) -> None:
143+
if (
144+
node.original_node.snapshot_node
145+
and node.original_node.snapshot_node.paint_order is not None
146+
and node.original_node.snapshot_node.bounds is not None
147+
):
148+
all_simplified_nodes_with_paint_order.append(node)
149+
150+
for child in node.children:
151+
collect_paint_order(child)
152+
153+
collect_paint_order(self.root)
154+
155+
grouped_by_paint_order: defaultdict[int, list[SimplifiedNode]] = defaultdict(list)
156+
157+
for node in all_simplified_nodes_with_paint_order:
158+
if node.original_node.snapshot_node and node.original_node.snapshot_node.paint_order is not None:
159+
grouped_by_paint_order[node.original_node.snapshot_node.paint_order].append(node)
160+
161+
rect_union = RectUnionPure()
162+
163+
for paint_order, nodes in sorted(grouped_by_paint_order.items(), key=lambda x: -x[0]):
164+
rects_to_add = []
165+
166+
for node in nodes:
167+
if not node.original_node.snapshot_node or not node.original_node.snapshot_node.bounds:
168+
continue # shouldn't happen by how we filter them out in the first place
169+
170+
rect = Rect(
171+
x1=node.original_node.snapshot_node.bounds.x,
172+
y1=node.original_node.snapshot_node.bounds.y,
173+
x2=node.original_node.snapshot_node.bounds.x + node.original_node.snapshot_node.bounds.width,
174+
y2=node.original_node.snapshot_node.bounds.y + node.original_node.snapshot_node.bounds.height,
175+
)
176+
177+
if rect_union.contains(rect):
178+
node.ignored_by_paint_order = True
179+
180+
# don't add to the nodes if opacity is less then 0.95 or background-color is transparent
181+
if (
182+
node.original_node.snapshot_node.computed_styles
183+
and node.original_node.snapshot_node.computed_styles.get('background-color', 'rgba(0, 0, 0, 0)')
184+
== 'rgba(0, 0, 0, 0)'
185+
) or (
186+
node.original_node.snapshot_node.computed_styles
187+
and float(node.original_node.snapshot_node.computed_styles.get('opacity', '1'))
188+
< 0.8 # this is highly vibes based number
189+
):
190+
continue
191+
192+
rects_to_add.append(rect)
193+
194+
for rect in rects_to_add:
195+
rect_union.add(rect)
196+
197+
return None

browser_use/dom/serializer/serializer.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
from browser_use.dom.serializer.clickable_elements import ClickableElementDetector
5+
from browser_use.dom.serializer.paint_order import PaintOrderRemover
56
from browser_use.dom.utils import cap_text_length
67
from browser_use.dom.views import (
78
DOMRect,
@@ -40,6 +41,7 @@ def __init__(
4041
previous_cached_state: SerializedDOMState | None = None,
4142
enable_bbox_filtering: bool = True,
4243
containment_threshold: float | None = None,
44+
paint_order_filtering: bool = True,
4345
):
4446
self.root_node = root_node
4547
self._interactive_counter = 1
@@ -52,6 +54,8 @@ def __init__(
5254
# Bounding box filtering configuration
5355
self.enable_bbox_filtering = enable_bbox_filtering
5456
self.containment_threshold = containment_threshold or self.DEFAULT_CONTAINMENT_THRESHOLD
57+
# Paint order filtering configuration
58+
self.paint_order_filtering = paint_order_filtering
5559

5660
def serialize_accessible_elements(self) -> tuple[SerializedDOMState, dict[str, float]]:
5761
import time
@@ -70,16 +74,19 @@ def serialize_accessible_elements(self) -> tuple[SerializedDOMState, dict[str, f
7074
end_step1 = time.time()
7175
self.timing_info['create_simplified_tree'] = end_step1 - start_step1
7276

73-
# Step 2: Optimize tree (remove unnecessary parents)
77+
# Step 2: Remove elements based on paint order
78+
start_step3 = time.time()
79+
if self.paint_order_filtering and simplified_tree:
80+
PaintOrderRemover(simplified_tree).calculate_paint_order()
81+
end_step3 = time.time()
82+
self.timing_info['calculate_paint_order'] = end_step3 - start_step3
83+
84+
# Step 3: Optimize tree (remove unnecessary parents)
7485
start_step2 = time.time()
7586
optimized_tree = self._optimize_tree(simplified_tree)
7687
end_step2 = time.time()
7788
self.timing_info['optimize_tree'] = end_step2 - start_step2
7889

79-
# # Step 3: Detect and group semantic elements
80-
# if optimized_tree:
81-
# self._detect_semantic_groups(optimized_tree)
82-
8390
# Step 3: Apply bounding box filtering (NEW)
8491
if self.enable_bbox_filtering and optimized_tree:
8592
start_step3 = time.time()
@@ -152,20 +159,16 @@ def _create_simplified_tree(self, node: EnhancedDOMTreeNode, depth: int = 0) ->
152159
simplified = SimplifiedNode(original_node=node, children=[])
153160
for child in node.content_document.children_nodes or []:
154161
simplified_child = self._create_simplified_tree(child, depth + 1)
155-
if simplified_child:
162+
if simplified_child is not None:
156163
simplified.children.append(simplified_child)
157164
return simplified
158165

159-
# Use enhanced scoring for inclusion decision
160-
is_interactive = self._is_interactive_cached(node)
161-
162-
is_visible = node.snapshot_node and node.is_visible
166+
is_visible = node.is_visible
163167
is_scrollable = node.is_actually_scrollable
164168

165169
# Include if interactive (regardless of visibility), or scrollable, or has children to process
166-
should_include = (is_interactive and is_visible) or is_scrollable or bool(node.children_and_shadow_roots)
167170

168-
if should_include:
171+
if is_visible or is_scrollable or bool(node.children_and_shadow_roots):
169172
simplified = SimplifiedNode(original_node=node, children=[])
170173
# simplified._analysis = analysis # Store analysis for grouping
171174

@@ -176,7 +179,7 @@ def _create_simplified_tree(self, node: EnhancedDOMTreeNode, depth: int = 0) ->
176179
simplified.children.append(simplified_child)
177180

178181
# Return if meaningful or has meaningful children
179-
if (is_interactive and is_visible) or is_scrollable or simplified.children:
182+
if is_visible or is_scrollable or simplified.children:
180183
return simplified
181184

182185
elif node.node_type == NodeType.TEXT_NODE:
@@ -206,7 +209,7 @@ def _optimize_tree(self, node: SimplifiedNode | None) -> SimplifiedNode | None:
206209
is_visible = node.original_node.snapshot_node and node.original_node.is_visible
207210

208211
if (
209-
(is_interactive_opt and is_visible) # Only keep interactive nodes that are visible
212+
is_visible # Keep all visible nodes
210213
or node.original_node.is_actually_scrollable
211214
or node.original_node.node_type == NodeType.TEXT_NODE
212215
or node.children
@@ -232,8 +235,8 @@ def _assign_interactive_indices_and_mark_new_nodes(self, node: SimplifiedNode |
232235
if not node:
233236
return
234237

235-
# Skip assigning index to excluded nodes
236-
if not (hasattr(node, 'excluded_by_parent') and node.excluded_by_parent):
238+
# Skip assigning index to excluded nodes, or ignored by paint order
239+
if not node.excluded_by_parent and not node.ignored_by_paint_order:
237240
# Assign index to clickable elements that are also visible
238241
is_interactive_assign = self._is_interactive_cached(node.original_node)
239242
is_visible = node.original_node.snapshot_node and node.original_node.is_visible

0 commit comments

Comments
 (0)