Skip to content

Commit 7dc559a

Browse files
authored
perf(metascraper): fast-path mergeRules in common case (microlinkhq#820)
* perf(metascraper): fast-path mergeRules in common case * test(metascraper): add mergeRules legacy-compat checks * test(metascraper): replace legacy comparator with explicit mergeRules suite
1 parent 97587f3 commit 7dc559a

File tree

2 files changed

+179
-35
lines changed

2 files changed

+179
-35
lines changed

packages/metascraper/src/rules.js

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,45 +31,46 @@ const mergeRules = (
3131
omitPropNames = new Set(),
3232
pickPropNames
3333
) => {
34+
const hasPickProps = Boolean(pickPropNames && pickPropNames.size > 0)
35+
const hasOmittedProps = Boolean(omitPropNames && omitPropNames.size > 0)
36+
const hasInlineRules = Array.isArray(rules) && rules.length > 0
37+
38+
if (!hasPickProps && !hasOmittedProps && !hasInlineRules) {
39+
return baseRules
40+
}
41+
3442
const result = {}
3543

36-
// Helper function to determine if a property should be included
3744
const shouldIncludeProp = propName => {
38-
if (pickPropNames && pickPropNames.size > 0) {
45+
if (hasPickProps) {
3946
return pickPropNames.has(propName)
4047
}
4148
return !omitPropNames.has(propName)
4249
}
4350

44-
// Process base rules first (shallow clone arrays only)
4551
for (const [propName, ruleArray] of baseRules) {
4652
if (shouldIncludeProp(propName)) {
47-
result[propName] = [...ruleArray] // Shallow clone array
53+
result[propName] = ruleArray
4854
}
4955
}
5056

51-
// Handle case where rules might be null/undefined or not an array
52-
if (!rules || !Array.isArray(rules)) {
57+
if (!hasInlineRules) {
5358
return Object.entries(result)
5459
}
5560

56-
// Process inline rules
5761
for (const { test, ...ruleSet } of rules) {
5862
for (const [propName, innerRules] of Object.entries(ruleSet)) {
5963
if (!shouldIncludeProp(propName)) continue
6064

61-
const processedRules = Array.isArray(innerRules)
62-
? [...innerRules]
63-
: [innerRules]
65+
const processedRules = castArray(innerRules)
6466
if (test) {
6567
for (const rule of processedRules) {
6668
rule.test = test
6769
}
6870
}
6971

7072
if (result[propName]) {
71-
// Prepend new rules to match original concat(innerRules, existing) behavior
72-
result[propName] = [...processedRules, ...result[propName]]
73+
result[propName] = processedRules.concat(result[propName])
7374
} else {
7475
result[propName] = processedRules
7576
}

packages/metascraper/test/unit/merge-rules.js

Lines changed: 166 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
'use strict'
22

33
const test = require('ava')
4+
const { mergeRules } = require('../../src/rules')
5+
6+
const getRuleIdsByProp = merged =>
7+
Object.fromEntries(
8+
merged.map(([propName, rules]) => [propName, rules.map(rule => rule.id)])
9+
)
10+
11+
const findRules = (merged, propName) =>
12+
merged.find(([name]) => name === propName)?.[1] ?? []
413

514
test("add a new rule from a prop that doesn't exist", async t => {
615
const url = 'https://microlink.io'
@@ -72,20 +81,89 @@ test('add a new rule for a prop that exists', async t => {
7281
t.is(metadata.image, 'https://microlink.io/logo.png')
7382
})
7483

75-
test('does not mutate original rule objects', t => {
76-
const { mergeRules } = require('../../src/rules')
84+
test('merge inline rules before base rules preserving rule-set order', t => {
85+
const baseRules = [
86+
['title', [{ id: 'base-title-1' }, { id: 'base-title-2' }]],
87+
['description', [{ id: 'base-description-1' }]]
88+
]
89+
90+
const inlineRules = [
91+
{
92+
title: [{ id: 'inline-title-1' }, { id: 'inline-title-2' }]
93+
},
94+
{
95+
title: [{ id: 'inline-title-3' }],
96+
description: [{ id: 'inline-description-1' }]
97+
}
98+
]
7799

78-
// Create original rule objects that we'll check for mutation
100+
const merged = mergeRules(inlineRules, baseRules)
101+
const ruleIdsByProp = getRuleIdsByProp(merged)
102+
103+
t.deepEqual(ruleIdsByProp.title, [
104+
'inline-title-3',
105+
'inline-title-1',
106+
'inline-title-2',
107+
'base-title-1',
108+
'base-title-2'
109+
])
110+
t.deepEqual(ruleIdsByProp.description, [
111+
'inline-description-1',
112+
'base-description-1'
113+
])
114+
})
115+
116+
test('accept non-array inline rule values', t => {
117+
const baseRules = [['title', [{ id: 'base-title-1' }]]]
118+
const inlineRules = [{ title: { id: 'inline-title-1' } }]
119+
120+
const merged = mergeRules(inlineRules, baseRules)
121+
122+
t.deepEqual(getRuleIdsByProp(merged).title, [
123+
'inline-title-1',
124+
'base-title-1'
125+
])
126+
})
127+
128+
test('propagate inline test function only to inline rules', t => {
129+
const baseRule = { id: 'base-title-1' }
130+
const inlineOne = { id: 'inline-title-1' }
131+
const inlineTwo = { id: 'inline-title-2' }
132+
const inlineDescription = { id: 'inline-description-1' }
133+
const testFn = () => true
134+
135+
const baseRules = [
136+
['title', [baseRule]],
137+
['description', []]
138+
]
139+
140+
const inlineRules = [
141+
{
142+
test: testFn,
143+
title: [inlineOne, inlineTwo],
144+
description: inlineDescription
145+
}
146+
]
147+
148+
const merged = mergeRules(inlineRules, baseRules)
149+
const titleRules = findRules(merged, 'title')
150+
const descriptionRules = findRules(merged, 'description')
151+
152+
t.is(titleRules[0].test, testFn)
153+
t.is(titleRules[1].test, testFn)
154+
t.is(descriptionRules[0].test, testFn)
155+
t.is(baseRule.test, undefined)
156+
})
157+
158+
test('does not mutate original rule objects', t => {
79159
const originalRule1 = { fn: () => 'value1', originalProp: 'unchanged' }
80160
const originalRule2 = { fn: () => 'value2', originalProp: 'unchanged' }
81161

82-
// Base rules with references to original rule objects
83162
const baseRules = [
84163
['prop1', [originalRule1]],
85164
['prop2', [originalRule2]]
86165
]
87166

88-
// Inline rules with a test property that would cause mutation
89167
const inlineRules = [
90168
{
91169
test: url => url.includes('example'),
@@ -94,14 +172,11 @@ test('does not mutate original rule objects', t => {
94172
}
95173
]
96174

97-
// Store original state for comparison
98175
const originalRule1Copy = { ...originalRule1 }
99176
const originalRule2Copy = { ...originalRule2 }
100177

101-
// Call mergeRules - this should not mutate the original rule objects
102178
mergeRules(inlineRules, baseRules)
103179

104-
// Verify original rule objects are not mutated
105180
t.deepEqual(
106181
originalRule1,
107182
originalRule1Copy,
@@ -112,8 +187,6 @@ test('does not mutate original rule objects', t => {
112187
originalRule2Copy,
113188
'originalRule2 should not be mutated'
114189
)
115-
116-
// Verify original rules don't have the test property added
117190
t.is(
118191
originalRule1.test,
119192
undefined,
@@ -127,32 +200,25 @@ test('does not mutate original rule objects', t => {
127200
})
128201

129202
test('baseRules array should not be mutated (fails without cloneDeep)', t => {
130-
const { mergeRules } = require('../../src/rules')
131-
132-
// Create original baseRules array
133203
const originalBaseRules = [
134204
['title', [() => 'original title']],
135205
['description', [() => 'original description']]
136206
]
137207

138-
// Store original length and structure for comparison
139208
const originalLength = originalBaseRules.length
140209
const originalKeys = originalBaseRules.map(([key]) => key)
141210
const originalTitleRulesLength = originalBaseRules[0][1].length
142211

143-
// Inline rules that will add new properties and merge with existing ones
144212
const inlineRules = [
145213
{
146-
title: [() => 'new title'], // This will merge with existing title
147-
author: [() => 'new author'], // This will add a new property
148-
test: url => url.includes('test') // This adds a test property
214+
title: [() => 'new title'],
215+
author: [() => 'new author'],
216+
test: url => url.includes('test')
149217
}
150218
]
151219

152-
// Call mergeRules - this should NOT mutate originalBaseRules
153220
const result = mergeRules(inlineRules, originalBaseRules)
154221

155-
// Verify the original baseRules array structure was not mutated
156222
t.is(
157223
originalBaseRules.length,
158224
originalLength,
@@ -169,24 +235,101 @@ test('baseRules array should not be mutated (fails without cloneDeep)', t => {
169235
'Original title rules array should not be modified'
170236
)
171237

172-
// Verify the result is different from the original (proving it's a new array)
173238
t.not(
174239
result,
175240
originalBaseRules,
176241
'Result should be a different array reference'
177242
)
178243

179-
// Verify the result contains the expected merged rules
180244
t.true(
181245
result.length >= originalLength,
182246
'Result should contain at least the original rules'
183247
)
184248

185-
// Find the title rule in the result and verify it was merged
186249
const titleRule = result.find(([propName]) => propName === 'title')
187250
t.truthy(titleRule, 'Title rule should exist in result')
188251
t.true(
189252
titleRule[1].length > originalTitleRulesLength,
190253
'Title rule should have more rules after merging'
191254
)
192255
})
256+
257+
test('omit filters both base and inline rules', t => {
258+
const baseRules = [
259+
['title', [{ id: 'base-title-1' }]],
260+
['description', [{ id: 'base-description-1' }]]
261+
]
262+
263+
const inlineRules = [
264+
{
265+
title: [{ id: 'inline-title-1' }],
266+
description: [{ id: 'inline-description-1' }],
267+
image: [{ id: 'inline-image-1' }]
268+
}
269+
]
270+
271+
const merged = mergeRules(
272+
inlineRules,
273+
baseRules,
274+
new Set(['description', 'image'])
275+
)
276+
277+
t.deepEqual(Object.keys(getRuleIdsByProp(merged)), ['title'])
278+
t.deepEqual(getRuleIdsByProp(merged).title, [
279+
'inline-title-1',
280+
'base-title-1'
281+
])
282+
})
283+
284+
test('pick filters both base and inline rules', t => {
285+
const baseRules = [
286+
['title', [{ id: 'base-title-1' }]],
287+
['description', [{ id: 'base-description-1' }]]
288+
]
289+
290+
const inlineRules = [
291+
{
292+
title: [{ id: 'inline-title-1' }],
293+
description: [{ id: 'inline-description-1' }],
294+
image: [{ id: 'inline-image-1' }]
295+
}
296+
]
297+
298+
const merged = mergeRules(
299+
inlineRules,
300+
baseRules,
301+
new Set(),
302+
new Set(['title', 'image'])
303+
)
304+
305+
const ruleIdsByProp = getRuleIdsByProp(merged)
306+
t.deepEqual(Object.keys(ruleIdsByProp), ['title', 'image'])
307+
t.deepEqual(ruleIdsByProp.title, ['inline-title-1', 'base-title-1'])
308+
t.deepEqual(ruleIdsByProp.image, ['inline-image-1'])
309+
})
310+
311+
test('pick has priority over omit', t => {
312+
const baseRules = [
313+
['title', [{ id: 'base-title-1' }]],
314+
['description', [{ id: 'base-description-1' }]]
315+
]
316+
317+
const inlineRules = [
318+
{
319+
title: [{ id: 'inline-title-1' }],
320+
image: [{ id: 'inline-image-1' }]
321+
}
322+
]
323+
324+
const merged = mergeRules(
325+
inlineRules,
326+
baseRules,
327+
new Set(['title', 'image']),
328+
new Set(['title', 'image'])
329+
)
330+
331+
const ruleIdsByProp = getRuleIdsByProp(merged)
332+
t.deepEqual(Object.keys(ruleIdsByProp), ['title', 'image'])
333+
t.deepEqual(ruleIdsByProp.title, ['inline-title-1', 'base-title-1'])
334+
t.deepEqual(ruleIdsByProp.image, ['inline-image-1'])
335+
})

0 commit comments

Comments
 (0)