Skip to content

Commit 1a2a0b9

Browse files
👷‍♀️ [RUM-11139] Improve textual content extraction for action names (#3759)
* Use treewalker to replace innerText * Add support for display and contentVisibility * Improve code and add test cases * Clean up * Update packages/rum-core/src/domain/action/getActionNameFromElement.spec.ts Co-authored-by: sethfowler-datadog <[email protected]> * Add feature flag * Improve code --------- Co-authored-by: sethfowler-datadog <[email protected]>
1 parent e753ed4 commit 1a2a0b9

File tree

3 files changed

+166
-14
lines changed

3 files changed

+166
-14
lines changed

packages/core/src/tools/experimentalFeatures.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import { objectHasValue } from './utils/objectUtils'
1616
export enum ExperimentalFeature {
1717
TRACK_INTAKE_REQUESTS = 'track_intake_requests',
1818
WRITABLE_RESOURCE_GRAPHQL = 'writable_resource_graphql',
19+
WATCH_COOKIE_WITHOUT_LOCK = 'watch_cookie_without_lock',
20+
USE_TREE_WALKER_FOR_ACTION_NAME = 'use_tree_walker_for_action_name',
1921
}
2022

2123
const enabledExperimentalFeatures: Set<ExperimentalFeature> = new Set()

packages/rum-core/src/domain/action/getActionNameFromElement.spec.ts

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { ExperimentalFeature } from '@datadog/browser-core'
2+
import { mockExperimentalFeatures } from '../../../../core/test'
13
import { appendElement, mockRumConfiguration } from '../../../test'
24
import { NodePrivacyLevel } from '../privacy'
35
import { ActionNameSource, getActionNameFromElement } from './getActionNameFromElement'
@@ -96,6 +98,57 @@ describe('getActionNameFromElement', () => {
9698
expect(nameSource).toBe('text_content')
9799
})
98100

101+
it('should correctly compute whitespace for <br> and <p> elements', () => {
102+
const testCases = [
103+
{ element: appendElement('<div>hello<br/>world</div>'), expected: 'hello world' },
104+
{ element: appendElement('<div>hello<p>world</p></div>'), expected: 'hello world' },
105+
{ element: appendElement('<div>hello<p>world<br/>!</p></div>'), expected: 'hello world !' },
106+
{ element: appendElement('<div>hello world<br/>!<p>!</p></div>'), expected: 'hello world ! !' },
107+
]
108+
testCases.forEach(({ element, expected }) => {
109+
const { name, nameSource } = getActionNameFromElement(element, defaultConfiguration)
110+
expect(name).toBe(expected)
111+
expect(nameSource).toBe('text_content')
112+
})
113+
})
114+
115+
it('should introduce whitespace for block-level display values', () => {
116+
mockExperimentalFeatures([ExperimentalFeature.USE_TREE_WALKER_FOR_ACTION_NAME])
117+
const testCases = [
118+
{ display: 'block', expected: 'space' },
119+
{ display: 'inline-block', expected: 'no-space' },
120+
{ display: 'flex', expected: 'space' },
121+
{ display: 'inline-flex', expected: 'no-space' },
122+
{ display: 'grid', expected: 'space' },
123+
{ display: 'inline-grid', expected: 'no-space' },
124+
{ display: 'list-item', expected: 'space' },
125+
{ display: 'table', expected: 'space' },
126+
{ display: 'table-caption', expected: 'space' },
127+
{ display: 'inline', expected: 'no-space' },
128+
{ display: 'none', expected: 'nothing' },
129+
]
130+
testCases.forEach(({ display, expected }) => {
131+
const element = appendElement(
132+
`<div><div style="display: ${display}">foo</div><div style="display: ${display}">bar</div></div>`
133+
)
134+
const { name, nameSource } = getActionNameFromElement(element, defaultConfiguration)
135+
switch (expected) {
136+
case 'space':
137+
expect(name).toBe('foo bar')
138+
expect(nameSource).toBe('text_content')
139+
break
140+
case 'no-space':
141+
expect(name).toBe('foobar')
142+
expect(nameSource).toBe('text_content')
143+
break
144+
case 'nothing':
145+
expect(name).toBe('')
146+
expect(nameSource).toBe('blank')
147+
break
148+
}
149+
})
150+
})
151+
99152
it('ignores the inline script textual content', () => {
100153
const { name, nameSource } = getActionNameFromElement(
101154
appendElement("<div><script>console.log('toto')</script>b</div>"),
@@ -418,7 +471,7 @@ describe('getActionNameFromElement', () => {
418471
expect(nameSource).toBe('custom_attribute')
419472
})
420473

421-
it('remove children with programmatic action name in textual content', () => {
474+
it('removes children with programmatic action name in textual content', () => {
422475
const { name, nameSource } = getActionNameFromElement(
423476
appendElement('<div>Foo <div data-dd-action-name="custom action">bar<div></div>'),
424477
defaultConfiguration
@@ -428,6 +481,16 @@ describe('getActionNameFromElement', () => {
428481
expect(nameSource).toBe('text_content')
429482
})
430483

484+
it('removes only the child with programmatic action name in textual content', () => {
485+
mockExperimentalFeatures([ExperimentalFeature.USE_TREE_WALKER_FOR_ACTION_NAME])
486+
const { name, nameSource } = getActionNameFromElement(
487+
appendElement('<div>Foobar Baz<div data-dd-action-name="custom action">bar<div></div>'),
488+
defaultConfiguration
489+
)
490+
expect(name).toBe('Foobar Baz')
491+
expect(nameSource).toBe('text_content')
492+
})
493+
431494
it('remove children with programmatic action name in textual content based on the user-configured attribute', () => {
432495
const { name, nameSource } = getActionNameFromElement(
433496
appendElement('<div>Foo <div data-test-id="custom action">bar<div></div>'),
@@ -639,6 +702,31 @@ describe('getActionNameFromElement', () => {
639702
)
640703
).toEqual({ name: 'foo', nameSource: ActionNameSource.TEXT_CONTENT })
641704
})
705+
706+
it('inherit privacy level and remove only the masked child', () => {
707+
mockExperimentalFeatures([ExperimentalFeature.USE_TREE_WALKER_FOR_ACTION_NAME])
708+
expect(
709+
getActionNameFromElement(
710+
appendElement(`
711+
<div class="dd-privacy-allow" target>
712+
bar secret
713+
<div>
714+
foo
715+
<div class="dd-privacy-mask">
716+
<span>secret</span>
717+
</div>
718+
</div>
719+
</div>
720+
`),
721+
{
722+
...defaultConfiguration,
723+
enablePrivacyForActionName: true,
724+
},
725+
NodePrivacyLevel.ALLOW
726+
)
727+
).toEqual({ name: 'bar secret foo', nameSource: ActionNameSource.TEXT_CONTENT })
728+
})
729+
642730
it('fallback to children but not the masked one with class names', () => {
643731
expect(
644732
getActionNameFromElement(

packages/rum-core/src/domain/action/getActionNameFromElement.ts

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
import { safeTruncate } from '@datadog/browser-core'
2-
import { NodePrivacyLevel, getPrivacySelector } from '../privacy'
1+
import { ExperimentalFeature, isExperimentalFeatureEnabled, safeTruncate } from '@datadog/browser-core'
2+
import { getNodeSelfPrivacyLevel, getPrivacySelector, NodePrivacyLevel, shouldMaskNode } from '../privacy'
33
import type { RumConfiguration } from '../configuration'
4+
import { isElementNode } from '../../browser/htmlDomUtils'
45

56
/**
67
* Get the action name from the attribute 'data-dd-action-name' on the element or any of its parent.
@@ -23,7 +24,7 @@ interface ActionName {
2324
export function getActionNameFromElement(
2425
element: Element,
2526
{ enablePrivacyForActionName, actionNameAttribute: userProgrammaticAttribute }: RumConfiguration,
26-
nodePrivacyLevel?: NodePrivacyLevel
27+
nodePrivacyLevel: NodePrivacyLevel = NodePrivacyLevel.ALLOW
2728
): ActionName {
2829
// Proceed to get the action name in two steps:
2930
// * first, get the name programmatically, explicitly defined by the user.
@@ -73,14 +74,14 @@ function getActionNameFromElementProgrammatically(targetElement: Element, progra
7374
type NameStrategy = (
7475
element: Element | HTMLElement | HTMLInputElement | HTMLSelectElement,
7576
userProgrammaticAttribute: string | undefined,
76-
privacyEnabledActionName?: boolean
77+
privacyEnabledActionName: boolean
7778
) => ActionName | undefined | null
7879

7980
const priorityStrategies: NameStrategy[] = [
8081
// associated LABEL text
81-
(element, userProgrammaticAttribute) => {
82+
(element, userProgrammaticAttribute, privacyEnabledActionName) => {
8283
if ('labels' in element && element.labels && element.labels.length > 0) {
83-
return getActionNameFromTextualContent(element.labels[0], userProgrammaticAttribute)
84+
return getActionNameFromTextualContent(element.labels[0], userProgrammaticAttribute, privacyEnabledActionName)
8485
}
8586
},
8687
// INPUT button (and associated) value
@@ -120,9 +121,9 @@ const priorityStrategies: NameStrategy[] = [
120121
(element) => getActionNameFromStandardAttribute(element, 'title'),
121122
(element) => getActionNameFromStandardAttribute(element, 'placeholder'),
122123
// SELECT first OPTION text
123-
(element, userProgrammaticAttribute) => {
124+
(element, userProgrammaticAttribute, privacyEnabledActionName) => {
124125
if ('options' in element && element.options.length > 0) {
125-
return getActionNameFromTextualContent(element.options[0], userProgrammaticAttribute)
126+
return getActionNameFromTextualContent(element.options[0], userProgrammaticAttribute, privacyEnabledActionName)
126127
}
127128
},
128129
]
@@ -141,7 +142,7 @@ function getActionNameFromElementForStrategies(
141142
targetElement: Element,
142143
userProgrammaticAttribute: string | undefined,
143144
strategies: NameStrategy[],
144-
privacyEnabledActionName?: boolean
145+
privacyEnabledActionName: boolean
145146
) {
146147
let element: Element | null = targetElement
147148
let recursionCounter = 0
@@ -196,7 +197,7 @@ function getActionNameFromStandardAttribute(element: Element | HTMLElement, attr
196197
function getActionNameFromTextualContent(
197198
element: Element | HTMLElement,
198199
userProgrammaticAttribute: string | undefined,
199-
privacyEnabledActionName?: boolean
200+
privacyEnabledActionName: boolean
200201
): ActionName {
201202
return {
202203
name: getTextualContent(element, userProgrammaticAttribute, privacyEnabledActionName) || '',
@@ -205,16 +206,20 @@ function getActionNameFromTextualContent(
205206
}
206207

207208
function getTextualContent(
208-
element: Element | HTMLElement,
209+
element: Element,
209210
userProgrammaticAttribute: string | undefined,
210-
privacyEnabledActionName?: boolean
211+
privacyEnabledActionName: boolean
211212
) {
212213
if ((element as HTMLElement).isContentEditable) {
213214
return
214215
}
215216

217+
if (isExperimentalFeatureEnabled(ExperimentalFeature.USE_TREE_WALKER_FOR_ACTION_NAME)) {
218+
return getTextualContentWithTreeWalker(element, userProgrammaticAttribute, privacyEnabledActionName)
219+
}
220+
216221
if ('innerText' in element) {
217-
let text = element.innerText
222+
let text = (element as HTMLElement).innerText
218223

219224
const removeTextFromElements = (query: string) => {
220225
const list = element.querySelectorAll<Element | HTMLElement>(query)
@@ -248,3 +253,60 @@ function getTextualContent(
248253

249254
return element.textContent
250255
}
256+
257+
function getTextualContentWithTreeWalker(
258+
element: Element,
259+
userProgrammaticAttribute: string | undefined,
260+
privacyEnabledActionName: boolean
261+
) {
262+
const walker = document.createTreeWalker(
263+
element,
264+
// eslint-disable-next-line no-bitwise
265+
NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT,
266+
rejectInvisibleOrMaskedElementsFilter
267+
)
268+
269+
let text = ''
270+
271+
while (walker.nextNode()) {
272+
const node = walker.currentNode
273+
if (isElementNode(node)) {
274+
if (
275+
// Following InnerText rendering spec https://html.spec.whatwg.org/multipage/dom.html#rendered-text-collection-steps
276+
node.nodeName === 'BR' ||
277+
node.nodeName === 'P' ||
278+
['block', 'flex', 'grid', 'list-item', 'table', 'table-caption'].includes(getComputedStyle(node).display)
279+
) {
280+
text += ' '
281+
}
282+
continue // skip element nodes
283+
}
284+
285+
text += node.textContent || ''
286+
}
287+
288+
return text.replace(/\s+/g, ' ').trim()
289+
290+
function rejectInvisibleOrMaskedElementsFilter(node: Node) {
291+
if (isElementNode(node)) {
292+
const nodeSelfPrivacyLevel = getNodeSelfPrivacyLevel(node)
293+
if (
294+
node.hasAttribute(DEFAULT_PROGRAMMATIC_ACTION_NAME_ATTRIBUTE) ||
295+
(userProgrammaticAttribute && node.hasAttribute(userProgrammaticAttribute)) ||
296+
(privacyEnabledActionName && nodeSelfPrivacyLevel && shouldMaskNode(node, nodeSelfPrivacyLevel))
297+
) {
298+
return NodeFilter.FILTER_REJECT
299+
}
300+
const style = getComputedStyle(node)
301+
if (
302+
style.visibility !== 'visible' ||
303+
style.display === 'none' ||
304+
(style.contentVisibility && style.contentVisibility !== 'visible')
305+
// contentVisibility is not supported in all browsers, so we need to check it
306+
) {
307+
return NodeFilter.FILTER_REJECT
308+
}
309+
}
310+
return NodeFilter.FILTER_ACCEPT
311+
}
312+
}

0 commit comments

Comments
 (0)