Skip to content

Commit a029c37

Browse files
jaysin586claude
andcommitted
fix(security): replace GROUP_REGEX with iterative parser to prevent ReDoS
Replace the vulnerable GROUP_REGEX constant (nested quantifiers over overlapping character classes) with findNamedGroups(), an O(n) iterative parenthesis-counting parser that eliminates exponential backtracking. - Add findNamedGroups() that scans for (?< markers, parses names, and counts parenthesis depth to find balanced closing parens - Refactor parseExpectedPatterns, cleanTemplate, groupByLine, and resolveTemplate to use the new parser instead of regex - Add ReDoS regression test (50k char malformed input completes <100ms) - Add tests for deep nesting, nested named group rejection, escaped parens, and empty patterns Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent cd3157a commit a029c37

File tree

2 files changed

+186
-19
lines changed

2 files changed

+186
-19
lines changed

src/lib/expectedPatterns.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,58 @@ describe('tagExpectedRegions', () => {
254254
})
255255
})
256256

257+
describe('findNamedGroups (via parseExpectedPatterns)', () => {
258+
it('completes in under 100ms on malformed input with 50k trailing chars (ReDoS regression)', () => {
259+
const malicious = '(?<A>' + 'a'.repeat(50000)
260+
const start = performance.now()
261+
const result = parseExpectedPatterns(malicious)
262+
const elapsed = performance.now() - start
263+
264+
expect(result).toBeNull()
265+
expect(elapsed).toBeLessThan(100)
266+
})
267+
268+
it('handles deep nesting (3+ levels) inside a group', () => {
269+
const text = '(?<name>(?:a|(?:b|(?:c|d))))'
270+
const result = parseExpectedPatterns(text)
271+
272+
expect(result).not.toBeNull()
273+
expect(result!.groups).toHaveLength(1)
274+
expect(result!.groups[0].name).toBe('name')
275+
expect(result!.groups[0].pattern).toBe('(?:a|(?:b|(?:c|d)))')
276+
})
277+
278+
it('rejects the outer group when it contains a nested named group', () => {
279+
const text = '(?<outer>(?<inner>foo))'
280+
const result = parseExpectedPatterns(text)
281+
282+
// The outer group is rejected due to nested named group,
283+
// but the inner group is still found as a standalone match
284+
expect(result).not.toBeNull()
285+
expect(result!.groups).toHaveLength(1)
286+
expect(result!.groups[0].name).toBe('inner')
287+
expect(result!.groups[0].pattern).toBe('foo')
288+
})
289+
290+
it('handles escaped parentheses inside groups', () => {
291+
const text = 'before (?<name>\\(escaped\\)) after'
292+
const result = parseExpectedPatterns(text)
293+
294+
expect(result).not.toBeNull()
295+
expect(result!.groups[0].name).toBe('name')
296+
expect(result!.groups[0].pattern).toBe('\\(escaped\\)')
297+
})
298+
299+
it('handles empty pattern in group', () => {
300+
const text = '(?<empty>)'
301+
const result = parseExpectedPatterns(text)
302+
303+
expect(result).not.toBeNull()
304+
expect(result!.groups[0].name).toBe('empty')
305+
expect(result!.groups[0].pattern).toBe('')
306+
})
307+
})
308+
257309
describe('cleanTemplate', () => {
258310
it('replaces named groups with readable placeholders', () => {
259311
expect(cleanTemplate('Copyright (?<year>\\d{4}) (?<holder>.+)')).toBe(

src/lib/expectedPatterns.ts

Lines changed: 134 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,105 @@ interface ParseResult {
5454
parts: string[]
5555
}
5656

57-
const GROUP_REGEX =
58-
/\(\?<([a-zA-Z_][a-zA-Z0-9_]*)>((?:[^()]*|\((?!\?<)(?:[^()]*|\([^()]*\))*\))*)?\)/g
57+
/**
58+
* Represents a named capture group match found by the iterative parser.
59+
*/
60+
interface GroupMatch {
61+
/** The full `(?<name>pattern)` string. */
62+
fullMatch: string
63+
/** The capture group name. */
64+
name: string
65+
/** The pattern inside the group (between `>` and closing `)`). */
66+
pattern: string
67+
/** The start index of the full match in the source text. */
68+
index: number
69+
}
70+
71+
/**
72+
* Finds all `(?<name>pattern)` named capture groups using an iterative
73+
* parenthesis-counting parser. Runs in O(n) time with no backtracking,
74+
* eliminating ReDoS risk from nested quantifiers.
75+
*
76+
* Rejects nested named groups (`(?<` inside the pattern body) to match
77+
* the previous regex behavior.
78+
*
79+
* @param text - The text to scan for named capture groups.
80+
* @returns An array of matched groups with their positions.
81+
*/
82+
const findNamedGroups = (text: string): GroupMatch[] => {
83+
const results: GroupMatch[] = []
84+
let i = 0
85+
86+
while (i < text.length) {
87+
// Look for `(?<` marker
88+
if (text[i] === '(' && text[i + 1] === '?' && text[i + 2] === '<') {
89+
const startIndex = i
90+
91+
// Parse the name: must be [a-zA-Z_][a-zA-Z0-9_]*
92+
const nameStart = i + 3
93+
if (nameStart >= text.length || !/[a-zA-Z_]/.test(text[nameStart])) {
94+
i++
95+
continue
96+
}
97+
98+
let nameEnd = nameStart + 1
99+
while (nameEnd < text.length && /[a-zA-Z0-9_]/.test(text[nameEnd])) {
100+
nameEnd++
101+
}
102+
103+
// Expect `>` after the name
104+
if (nameEnd >= text.length || text[nameEnd] !== '>') {
105+
i++
106+
continue
107+
}
108+
109+
const name = text.slice(nameStart, nameEnd)
110+
const patternStart = nameEnd + 1
111+
112+
// Count parenthesis depth to find balanced closing `)`
113+
// We start at depth 1 (for the opening `(` at startIndex)
114+
let depth = 1
115+
let j = patternStart
116+
let hasNestedNamedGroup = false
117+
118+
while (j < text.length && depth > 0) {
119+
if (text[j] === '\\') {
120+
j += 2 // skip escaped character
121+
continue
122+
}
123+
if (text[j] === '(') {
124+
// Check for nested named group
125+
if (
126+
text[j + 1] === '?' &&
127+
text[j + 2] === '<' &&
128+
j + 3 < text.length &&
129+
/[a-zA-Z_]/.test(text[j + 3])
130+
) {
131+
hasNestedNamedGroup = true
132+
}
133+
depth++
134+
} else if (text[j] === ')') {
135+
depth--
136+
if (depth === 0) break
137+
}
138+
j++
139+
}
140+
141+
if (depth === 0 && !hasNestedNamedGroup) {
142+
const pattern = text.slice(patternStart, j)
143+
const fullMatch = text.slice(startIndex, j + 1)
144+
results.push({ fullMatch, name, pattern, index: startIndex })
145+
i = j + 1
146+
} else {
147+
i++
148+
}
149+
} else {
150+
i++
151+
}
152+
}
153+
154+
return results
155+
}
59156

60157
/**
61158
* Parses `(?<name>pattern)` named capture groups from text.
@@ -67,21 +164,20 @@ const GROUP_REGEX =
67164
* @returns The parsed groups and parts, or null if no named groups are found.
68165
*/
69166
export const parseExpectedPatterns = (text: string): ParseResult | null => {
70-
const groupRegex = new RegExp(GROUP_REGEX.source, GROUP_REGEX.flags)
167+
const matches = findNamedGroups(text)
168+
if (matches.length === 0) return null
169+
71170
const groups: ParsedGroup[] = []
72171
const parts: string[] = []
73172
let lastIndex = 0
74-
let match: RegExpExecArray | null
75173

76-
while ((match = groupRegex.exec(text)) !== null) {
174+
for (const match of matches) {
77175
parts.push(text.slice(lastIndex, match.index))
78-
groups.push({ name: match[1], pattern: match[2] ?? '' })
79-
parts.push(match[0]) // the full group match as a placeholder
80-
lastIndex = groupRegex.lastIndex
176+
groups.push({ name: match.name, pattern: match.pattern })
177+
parts.push(match.fullMatch) // the full group match as a placeholder
178+
lastIndex = match.index + match.fullMatch.length
81179
}
82180

83-
if (groups.length === 0) return null
84-
85181
parts.push(text.slice(lastIndex))
86182
return { groups, parts }
87183
}
@@ -96,8 +192,18 @@ export const parseExpectedPatterns = (text: string): ParseResult | null => {
96192
* @returns The text with capture groups replaced by `<name>` placeholders.
97193
*/
98194
export const cleanTemplate = (text: string): string => {
99-
const groupRegex = new RegExp(GROUP_REGEX.source, GROUP_REGEX.flags)
100-
return text.replace(groupRegex, (_, name) => `<${name}>`)
195+
const matches = findNamedGroups(text)
196+
if (matches.length === 0) return text
197+
198+
let result = ''
199+
let lastIndex = 0
200+
for (const match of matches) {
201+
result += text.slice(lastIndex, match.index)
202+
result += `<${match.name}>`
203+
lastIndex = match.index + match.fullMatch.length
204+
}
205+
result += text.slice(lastIndex)
206+
return result
101207
}
102208

103209
/**
@@ -134,13 +240,12 @@ const groupByLine = (
134240
originalText: string,
135241
_parseResult: ParseResult
136242
): Map<number, { lineText: string; groups: LineGroup[] }> => {
137-
const groupRegex = new RegExp(GROUP_REGEX.source, GROUP_REGEX.flags)
243+
const matches = findNamedGroups(originalText)
138244
const lines = originalText.split('\n')
139245

140246
const lineMap = new Map<number, { lineText: string; groups: LineGroup[] }>()
141-
let match: RegExpExecArray | null
142247

143-
while ((match = groupRegex.exec(originalText)) !== null) {
248+
for (const match of matches) {
144249
const absIndex = match.index
145250
let charCount = 0
146251
let lineNum = 0
@@ -163,8 +268,8 @@ const groupByLine = (
163268
}
164269

165270
lineMap.get(lineNum)!.groups.push({
166-
name: match[1],
167-
pattern: match[2] ?? '',
271+
name: match.name,
272+
pattern: match.pattern,
168273
indexInLine: absIndex - lineStartOffset
169274
})
170275
}
@@ -307,8 +412,18 @@ export const extractCaptures = (
307412
* @returns The template with capture groups replaced by their captured values.
308413
*/
309414
const resolveTemplate = (text: string, captures: Record<string, string>): string => {
310-
const groupRegex = new RegExp(GROUP_REGEX.source, GROUP_REGEX.flags)
311-
return text.replace(groupRegex, (_, name) => captures[name] ?? '')
415+
const matches = findNamedGroups(text)
416+
if (matches.length === 0) return text
417+
418+
let result = ''
419+
let lastIndex = 0
420+
for (const match of matches) {
421+
result += text.slice(lastIndex, match.index)
422+
result += captures[match.name] ?? ''
423+
lastIndex = match.index + match.fullMatch.length
424+
}
425+
result += text.slice(lastIndex)
426+
return result
312427
}
313428

314429
/**

0 commit comments

Comments
 (0)