@@ -17,6 +17,12 @@ export interface FirstCharacterPath {
1717
1818 /** Indicates whether additional characters must follow to complete the match. */
1919 requiresMore : boolean
20+
21+ /**
22+ * Indicates whether the alternative can consume more characters after the
23+ * prefix.
24+ */
25+ canMatchMore : boolean
2026}
2127
2228/** Matcher produced from a character class AST node. */
@@ -60,8 +66,14 @@ interface AnalysisContext {
6066 /** Cache storing computed minimum lengths for AST nodes. */
6167 minLengthCache : WeakMap < object , LengthResult >
6268
69+ /** Cache storing computed maximum lengths for AST nodes. */
70+ maxLengthCache : WeakMap < object , LengthResult >
71+
6372 /** Alternatives currently on the recursion stack. */
64- activeAlternatives : Set < Alternative >
73+ minLengthActiveAlternatives : Set < Alternative >
74+
75+ /** Alternatives on the recursion stack for maximum-length calculation. */
76+ maxLengthActiveAlternatives : Set < Alternative >
6577
6678 /** Indicates whether collection exceeded the maximum allowed paths. */
6779 limitExceeded : boolean
@@ -72,8 +84,9 @@ interface AnalysisContext {
7284
7385/** Internal extension that includes metadata needed during traversal. */
7486interface FirstCharacterPathInternal extends FirstCharacterPath {
75- /** Mirrors the public flag for convenience when mutating paths. */
87+ /** Mirrors the public flags for convenience when mutating paths. */
7688 requiresMore : boolean
89+ canMatchMore : boolean
7790}
7891
7992type LengthResult = LengthInfo | null
@@ -91,8 +104,10 @@ export function getFirstCharacterPaths(
91104 alternative : Alternative ,
92105) : FirstCharacterPath [ ] {
93106 let context : AnalysisContext = {
107+ minLengthActiveAlternatives : new Set ( ) ,
108+ maxLengthActiveAlternatives : new Set ( ) ,
94109 minLengthCache : new WeakMap ( ) ,
95- activeAlternatives : new Set ( ) ,
110+ maxLengthCache : new WeakMap ( ) ,
96111 limitExceeded : false ,
97112 pathCount : 0 ,
98113 }
@@ -106,6 +121,86 @@ export function getFirstCharacterPaths(
106121 return paths
107122}
108123
124+ /**
125+ * Computes the maximum possible length for an element.
126+ *
127+ * @param element - AST element to analyze.
128+ * @param context - Shared traversal context.
129+ * @returns Maximum length in characters, `2` for "two or more", or `null` if
130+ * unknown.
131+ */
132+ function getElementMaxLength (
133+ element : Element ,
134+ context : AnalysisContext ,
135+ ) : LengthResult {
136+ // Defensive guard triggers only when traversal exceeded path limit earlier.
137+ /* c8 ignore next 3 */
138+ if ( context . limitExceeded ) {
139+ return null
140+ }
141+
142+ let cached = context . maxLengthCache . get ( element )
143+
144+ if ( cached !== undefined ) {
145+ return cached
146+ }
147+
148+ let result : LengthResult = null
149+
150+ switch ( element . type ) {
151+ case 'CharacterClass' :
152+ case 'CharacterSet' :
153+ case 'Character' : {
154+ result = 1
155+ break
156+ }
157+ case 'CapturingGroup' :
158+ case 'Group' : {
159+ result = getGroupMaxLength ( element , context )
160+ break
161+ }
162+ case 'Backreference' : {
163+ result = null
164+ break
165+ }
166+ case 'Quantifier' : {
167+ let innerLength = getElementMaxLength ( element . element , context )
168+
169+ if ( innerLength === null ) {
170+ result = null
171+ break
172+ }
173+
174+ // Numerical sentinels are unreachable with current AST inputs.
175+ /* c8 ignore start */
176+ if ( innerLength === 0 || element . max === 0 ) {
177+ result = 0
178+ break
179+ }
180+
181+ if ( element . max === Infinity ) {
182+ result = 2
183+ break
184+ }
185+ /* c8 ignore stop */
186+
187+ result = multiplyLength ( innerLength , element . max )
188+ break
189+ }
190+ case 'Assertion' : {
191+ result = 0
192+ break
193+ }
194+ default : {
195+ result = null
196+ }
197+ }
198+
199+ context . maxLengthCache . set ( element , result )
200+
201+ return result
202+ }
203+
109204/**
110205 * Collects deterministic first-character paths that originate from the provided
111206 * element.
@@ -128,6 +223,7 @@ function collectFirstCharacterPathsFromElement(
128223 {
129224 matcher : { type : 'character-class' , value : element } ,
130225 requiresMore : false ,
226+ canMatchMore : false ,
131227 } ,
132228 ]
133229 }
@@ -146,6 +242,7 @@ function collectFirstCharacterPathsFromElement(
146242 {
147243 matcher : { type : 'character-set' , value : element } ,
148244 requiresMore : false ,
245+ canMatchMore : false ,
149246 } ,
150247 ]
151248 }
@@ -157,6 +254,7 @@ function collectFirstCharacterPathsFromElement(
157254 {
158255 matcher : { value : element . value , type : 'character' } ,
159256 requiresMore : false ,
257+ canMatchMore : false ,
160258 } ,
161259 ]
162260 }
@@ -195,10 +293,14 @@ function collectFirstCharacterPathsFromAlternative(
195293
196294 if ( elementPaths . length > 0 ) {
197295 let restLength = getElementsMinLength ( elements , index + 1 , context )
296+ let restMaxLength = getElementsMaxLength ( elements , index + 1 , context )
198297
199298 if ( restLength !== null ) {
299+ let restCanMatchMore = restMaxLength !== 0
300+
200301 for ( let path of elementPaths ) {
201302 addPath ( results , context , {
303+ canMatchMore : path . canMatchMore || restCanMatchMore ,
202304 requiresMore : path . requiresMore || restLength > 0 ,
203305 matcher : path . matcher ,
204306 } )
@@ -214,6 +316,45 @@ function collectFirstCharacterPathsFromAlternative(
214316 return results
215317}
216318
319+ /**
320+ * Expands quantifiers into their potential first-character paths.
321+ *
322+ * @param quantifier - Quantifier node to analyze.
323+ * @param context - Shared traversal context.
324+ * @returns Paths contributed by the quantified expression.
325+ */
326+ function collectFirstCharacterPathsFromQuantifier (
327+ quantifier : Quantifier ,
328+ context : AnalysisContext ,
329+ ) : FirstCharacterPathInternal [ ] {
330+ let innerPaths = collectFirstCharacterPathsFromElement (
331+ quantifier . element ,
332+ context ,
333+ )
334+
335+ if ( innerPaths . length === 0 || context . limitExceeded ) {
336+ return [ ]
337+ }
338+
339+ let innerMinLength = getElementMinLength ( quantifier . element , context )
340+ if ( innerMinLength === null ) {
341+ return [ ]
342+ }
343+
344+ let innerMaxLength = getElementMaxLength ( quantifier . element , context )
345+ let requiresAdditionalIterations = quantifier . min > 1 && innerMinLength > 0
346+ let elementCanConsumeCharacters = innerMaxLength !== 0
347+ let allowsAdditionalIterations =
348+ elementCanConsumeCharacters &&
349+ ( quantifier . max === Infinity || quantifier . max > 1 )
350+
351+ return innerPaths . map ( path => ( {
352+ requiresMore : path . requiresMore || requiresAdditionalIterations ,
353+ canMatchMore : path . canMatchMore || allowsAdditionalIterations ,
354+ matcher : path . matcher ,
355+ } ) )
356+ }
357+
217358/**
218359 * Computes the minimum possible length for the provided element.
219360 *
@@ -275,36 +416,36 @@ function getElementMinLength(
275416}
276417
277418/**
278- * Expands quantifiers into their potential first-character paths .
419+ * Computes the maximum possible length for an alternative .
279420 *
280- * @param quantifier - Quantifier node to analyze .
421+ * @param alternative - Alternative whose elements should be measured .
281422 * @param context - Shared traversal context.
282- * @returns Paths contributed by the quantified expression .
423+ * @returns Maximum length for the entire alternative .
283424 */
284- function collectFirstCharacterPathsFromQuantifier (
285- quantifier : Quantifier ,
425+ function getAlternativeMaxLength (
426+ alternative : Alternative ,
286427 context : AnalysisContext ,
287- ) : FirstCharacterPathInternal [ ] {
288- let innerPaths = collectFirstCharacterPathsFromElement (
289- quantifier . element ,
290- context ,
291- )
428+ ) : LengthResult {
429+ let cached = context . maxLengthCache . get ( alternative )
292430
293- if ( innerPaths . length === 0 || context . limitExceeded ) {
294- return [ ]
431+ // Cache reuse only occurs for recursive alternatives, which tests do not create.
432+ /* c8 ignore next 3 */
433+ if ( cached !== undefined ) {
434+ return cached
295435 }
296436
297- let innerMinLength = getElementMinLength ( quantifier . element , context )
298- if ( innerMinLength === null ) {
299- return [ ]
437+ if ( context . maxLengthActiveAlternatives . has ( alternative ) ) {
438+ return null
300439 }
301440
302- let requiresAdditionalIterations = quantifier . min > 1 && innerMinLength > 0
441+ context . maxLengthActiveAlternatives . add ( alternative )
303442
304- return innerPaths . map ( path => ( {
305- requiresMore : path . requiresMore || requiresAdditionalIterations ,
306- matcher : path . matcher ,
307- } ) )
443+ let length = getElementsMaxLength ( alternative . elements , 0 , context )
444+
445+ context . maxLengthActiveAlternatives . delete ( alternative )
446+ context . maxLengthCache . set ( alternative , length )
447+
448+ return length
308449}
309450
310451/**
@@ -324,20 +465,53 @@ function getAlternativeMinLength(
324465 return cached
325466 }
326467
327- if ( context . activeAlternatives . has ( alternative ) ) {
468+ if ( context . minLengthActiveAlternatives . has ( alternative ) ) {
328469 return null
329470 }
330471
331- context . activeAlternatives . add ( alternative )
472+ context . minLengthActiveAlternatives . add ( alternative )
332473
333474 let length = getElementsMinLength ( alternative . elements , 0 , context )
334475
335- context . activeAlternatives . delete ( alternative )
476+ context . minLengthActiveAlternatives . delete ( alternative )
336477 context . minLengthCache . set ( alternative , length )
337478
338479 return length
339480}
340481
482+ /**
483+ * Computes the maximum length of a suffix of elements.
484+ *
485+ * @param elements - Sequence of elements belonging to an alternative.
486+ * @param startIndex - Index from which the suffix begins.
487+ * @param context - Shared traversal context.
488+ * @returns Maximum length for the suffix.
489+ */
490+ function getElementsMaxLength (
491+ elements : Alternative [ 'elements' ] ,
492+ startIndex : number ,
493+ context : AnalysisContext ,
494+ ) : LengthResult {
495+ let length : LengthResult = 0
496+
497+ for ( let index = startIndex ; index < elements . length ; index ++ ) {
498+ let element = elements [ index ] !
499+ let elementLength = getElementMaxLength ( element , context )
500+
501+ length = addLengths ( length , elementLength )
502+
503+ if ( length === null ) {
504+ return null
505+ }
506+
507+ if ( length === 2 ) {
508+ return 2
509+ }
510+ }
511+
512+ return length
513+ }
514+
341515/**
342516 * Computes the minimum length of a suffix of elements.
343517 *
@@ -403,6 +577,39 @@ function getGroupMinLength(
403577 return minLength
404578}
405579
580+ /**
581+ * Computes the maximum length among the alternatives contained in a group.
582+ *
583+ * @param group - Capturing or non-capturing group to analyze.
584+ * @param context - Shared traversal context.
585+ * @returns Maximum length across the group's alternatives.
586+ */
587+ function getGroupMaxLength (
588+ group : CapturingGroup | Group ,
589+ context : AnalysisContext ,
590+ ) : LengthResult {
591+ let maxLength : LengthResult = 0
592+
593+ for ( let alternative of group . alternatives ) {
594+ let alternativeLength = getAlternativeMaxLength ( alternative , context )
595+
596+ if ( alternativeLength === null ) {
597+ return null
598+ }
599+
600+ if ( alternativeLength > maxLength ) {
601+ maxLength = alternativeLength
602+ }
603+
604+ if ( maxLength === 2 ) {
605+ break
606+ }
607+ }
608+
609+ return maxLength
610+ }
611+
612+ /* c8 ignore start */
406613/**
407614 * Multiplies a minimum length by a quantifier count while respecting sentinel
408615 * values.
@@ -430,6 +637,7 @@ function multiplyLength(length: LengthResult, count: number): LengthResult {
430637
431638 return 2
432639}
640+ /* c8 ignore stop */
433641
434642/**
435643 * Adds a collected path to the results while accounting for the safety limit.
@@ -452,7 +660,7 @@ function addPath(
452660}
453661
454662/**
455- * Adds two minimum -length values together, preserving sentinel semantics.
663+ * Adds two maximum -length values together, preserving sentinel semantics.
456664 *
457665 * @param a - First length operand.
458666 * @param b - Second length operand.
0 commit comments