Skip to content

Commit 6728d12

Browse files
authored
Add a slimDOM option to strip out unnecessary parts of the DOM (#36)
* Add a `slimDOM` option to strip out unnecessary parts of the DOM in terms of replay - <script> tags in the <head> take up unnecessary storage space and are often injected semi randomly to become a source of unnecessary variation between recordings of the same thing - comment tags can be stripped out without affecting display - future: this option could also turn on more aggressive stripping, e.g. elements that are hidden by CSS (assuming we can handle them becoming visible after mutation events) * Mark nodes ignored due to slimDOM option, so that they can also be ignored by the mutation observer in rrweb * Introducing the `ignored` attribute violates the `serializedNodeWithId` type * slimDOM: Strip out whitespace nodes from <head> element as they have no effect but take up space - these would otherwise have to be merged after <script> elements are removed; for statcounter usecase, removing <script> elements is no good if there is still a trace of their presence due to the white space (and hence a variant <head> node is still produced) - I explored a more radical stripping of all white space nodes, but there is a problem if parent node is <pre> or otherwise rendered with `white-space: pre` and similar. detecting applied styles with getComputedStyle would be very expensive (I haven't measured it though) * Export IGNORED_NODE as a constant instead of relying on the hard-to-grok `-2` * Remove <link rel=preload as=script> which are similarly as useless as <script> tags * Make slimDOM configurable with the expecations that `slimDOMOptions: true` will only enable non-destructive options (so not all options may be turned on) * Expand slimDOM to add options to remove more elements from the <head> that should not be necessary in the replayer context
1 parent fd285ed commit 6728d12

File tree

3 files changed

+139
-0
lines changed

3 files changed

+139
-0
lines changed

src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import snapshot, {
22
serializeNodeWithId,
33
transformAttribute,
44
visitSnapshot,
5+
IGNORED_NODE,
56
} from './snapshot';
67
import rebuild, { buildNodeWithSN, addHoverClass } from './rebuild';
78
export * from './types';
@@ -14,4 +15,5 @@ export {
1415
addHoverClass,
1516
transformAttribute,
1617
visitSnapshot,
18+
IGNORED_NODE,
1719
};

src/snapshot.ts

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ import {
66
INode,
77
idNodeMap,
88
MaskInputOptions,
9+
SlimDOMOptions,
910
} from './types';
1011

1112
let _id = 1;
1213
const tagNameRegex = RegExp('[^a-z1-6-_]');
1314

15+
export const IGNORED_NODE = -2;
16+
1417
function genId(): number {
1518
return _id++;
1619
}
@@ -323,6 +326,83 @@ function serializeNode(
323326
}
324327
}
325328

329+
function lowerIfExists(maybeAttr : string | number | boolean) : string {
330+
if (maybeAttr === undefined) {
331+
return '';
332+
} else {
333+
return (maybeAttr as string).toLowerCase();
334+
}
335+
}
336+
337+
function slimDOMExcluded(sn: serializedNode, slimDOMOptions: SlimDOMOptions): boolean {
338+
if (slimDOMOptions.comment && sn.type === NodeType.Comment) {
339+
// TODO: convert IE conditional comments to real nodes
340+
return true;
341+
} else if (sn.type === NodeType.Element) {
342+
if (slimDOMOptions.script &&
343+
(sn.tagName === 'script' ||
344+
(sn.tagName === 'link' && sn.attributes.rel === 'preload' && sn.attributes['as'] === 'script')
345+
)) {
346+
return true;
347+
} else if (slimDOMOptions.headFavicon && (
348+
(sn.tagName === 'link' && sn.attributes.rel === 'shortcut icon')
349+
|| (sn.tagName === 'meta' && (
350+
lowerIfExists(sn.attributes['name']).match(/^msapplication-tile(image|color)$/)
351+
|| lowerIfExists(sn.attributes['name']) === 'application-name'
352+
|| lowerIfExists(sn.attributes['rel']) === 'icon'
353+
|| lowerIfExists(sn.attributes['rel']) === 'apple-touch-icon'
354+
|| lowerIfExists(sn.attributes['rel']) === 'shortcut icon'
355+
)))) {
356+
return true;
357+
} else if (sn.tagName === 'meta') {
358+
if (slimDOMOptions.headMetaDescKeywords && (
359+
lowerIfExists(sn.attributes['name']).match(/^description|keywords$/)
360+
)) {
361+
return true;
362+
} else if (slimDOMOptions.headMetaSocial && (
363+
lowerIfExists(sn.attributes['property']).match(/^(og|twitter|fb):/) // og = opengraph (facebook)
364+
|| lowerIfExists(sn.attributes['name']).match(/^(og|twitter):/)
365+
|| lowerIfExists(sn.attributes['name']) === 'pinterest'
366+
)) {
367+
return true;
368+
} else if (slimDOMOptions.headMetaRobots && (
369+
lowerIfExists(sn.attributes['name']) === 'robots'
370+
|| lowerIfExists(sn.attributes['name']) === 'googlebot'
371+
|| lowerIfExists(sn.attributes['name']) === 'bingbot'
372+
)) {
373+
return true;
374+
} else if (slimDOMOptions.headMetaHttpEquiv && (
375+
sn.attributes['http-equiv'] !== undefined
376+
)) {
377+
// e.g. X-UA-Compatible, Content-Type, Content-Language,
378+
// cache-control, X-Translated-By
379+
return true;
380+
} else if (slimDOMOptions.headMetaAuthorship && (
381+
lowerIfExists(sn.attributes['name']) === 'author'
382+
|| lowerIfExists(sn.attributes['name']) === 'generator'
383+
|| lowerIfExists(sn.attributes['name']) === 'framework'
384+
|| lowerIfExists(sn.attributes['name']) === 'publisher'
385+
|| lowerIfExists(sn.attributes['name']) === 'progid'
386+
|| lowerIfExists(sn.attributes['property']).match(/^article:/)
387+
|| lowerIfExists(sn.attributes['property']).match(/^product:/)
388+
)) {
389+
return true;
390+
} else if (slimDOMOptions.headMetaVerification && (
391+
lowerIfExists(sn.attributes['name']) === 'google-site-verification'
392+
|| lowerIfExists(sn.attributes['name']) === 'yandex-verification'
393+
|| lowerIfExists(sn.attributes['name']) === 'csrf-token'
394+
|| lowerIfExists(sn.attributes['name']) === 'p:domain_verify'
395+
|| lowerIfExists(sn.attributes['name']) === 'verify-v1'
396+
|| lowerIfExists(sn.attributes['name']) === 'verification'
397+
|| lowerIfExists(sn.attributes['name']) === 'shopify-checkout-api-token'
398+
)) {
399+
return true;
400+
}
401+
}
402+
}
403+
return false;
404+
}
405+
326406
export function serializeNodeWithId(
327407
n: Node | INode,
328408
doc: Document,
@@ -331,7 +411,9 @@ export function serializeNodeWithId(
331411
skipChild = false,
332412
inlineStylesheet = true,
333413
maskInputOptions?: MaskInputOptions,
414+
slimDOMOptions: SlimDOMOptions = {},
334415
recordCanvas?: boolean,
416+
preserveWhiteSpace = true,
335417
): serializedNodeWithId | null {
336418
const _serializedNode = serializeNode(
337419
n,
@@ -346,15 +428,26 @@ export function serializeNodeWithId(
346428
console.warn(n, 'not serialized');
347429
return null;
348430
}
431+
349432
let id;
350433
// Try to reuse the previous id
351434
if ('__sn' in n) {
352435
id = n.__sn.id;
436+
} else if (slimDOMExcluded(_serializedNode, slimDOMOptions) ||
437+
(!preserveWhiteSpace &&
438+
_serializedNode.type === NodeType.Text &&
439+
!_serializedNode.isStyle &&
440+
!_serializedNode.textContent.replace(/^\s+|\s+$/gm,'').length
441+
)) {
442+
id = IGNORED_NODE;
353443
} else {
354444
id = genId();
355445
}
356446
const serializedNode = Object.assign(_serializedNode, { id });
357447
(n as INode).__sn = serializedNode;
448+
if (id === IGNORED_NODE) {
449+
return null; // slimDOM
450+
}
358451
map[id] = n as INode;
359452
let recordChild = !skipChild;
360453
if (serializedNode.type === NodeType.Element) {
@@ -367,6 +460,14 @@ export function serializeNodeWithId(
367460
serializedNode.type === NodeType.Element) &&
368461
recordChild
369462
) {
463+
if (
464+
(slimDOMOptions.headWhitespace &&
465+
_serializedNode.type === NodeType.Element &&
466+
_serializedNode.tagName == 'head')
467+
// would impede performance: || getComputedStyle(n)['white-space'] === 'normal'
468+
) {
469+
preserveWhiteSpace = false;
470+
}
370471
for (const childN of Array.from(n.childNodes)) {
371472
const serializedChildNode = serializeNodeWithId(
372473
childN,
@@ -376,7 +477,9 @@ export function serializeNodeWithId(
376477
skipChild,
377478
inlineStylesheet,
378479
maskInputOptions,
480+
slimDOMOptions,
379481
recordCanvas,
482+
preserveWhiteSpace,
380483
);
381484
if (serializedChildNode) {
382485
serializedNode.childNodes.push(serializedChildNode);
@@ -391,6 +494,7 @@ function snapshot(
391494
blockClass: string | RegExp = 'rr-block',
392495
inlineStylesheet = true,
393496
maskAllInputsOrOptions: boolean | MaskInputOptions,
497+
slimDOMSensibleOrOptions: boolean | SlimDOMOptions,
394498
recordCanvas?: boolean,
395499
): [serializedNodeWithId | null, idNodeMap] {
396500
const idNodeMap: idNodeMap = {};
@@ -416,6 +520,25 @@ function snapshot(
416520
: maskAllInputsOrOptions === false
417521
? {}
418522
: maskAllInputsOrOptions;
523+
const slimDOMOptions: SlimDOMOptions =
524+
(slimDOMSensibleOrOptions === true ||
525+
slimDOMSensibleOrOptions === 'all')
526+
// if true: set of sensible options that should not throw away any information
527+
? {
528+
script: true,
529+
comment: true,
530+
headFavicon: true,
531+
headWhitespace: true,
532+
headMetaDescKeywords: slimDOMSensibleOrOptions === 'all', // destructive
533+
headMetaSocial: true,
534+
headMetaRobots: true,
535+
headMetaHttpEquiv: true,
536+
headMetaAuthorship: true,
537+
headMetaVerification: true,
538+
}
539+
: slimDOMSensibleOrOptions === false
540+
? {}
541+
: slimDOMSensibleOrOptions;
419542
return [
420543
serializeNodeWithId(
421544
n,
@@ -425,6 +548,7 @@ function snapshot(
425548
false,
426549
inlineStylesheet,
427550
maskInputOptions,
551+
slimDOMOptions,
428552
recordCanvas,
429553
),
430554
idNodeMap,

src/types.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,16 @@ export type MaskInputOptions = Partial<{
8787
textarea: boolean;
8888
select: boolean;
8989
}>;
90+
91+
export type SlimDOMOptions = Partial<{
92+
script: boolean;
93+
comment: boolean;
94+
headFavicon: boolean;
95+
headWhitespace: boolean;
96+
headMetaDescKeywords: boolean;
97+
headMetaSocial: boolean;
98+
headMetaRobots: boolean;
99+
headMetaHttpEquiv: boolean;
100+
headMetaAuthorship: boolean;
101+
headMetaVerification: boolean;
102+
}>;

0 commit comments

Comments
 (0)