@@ -155,15 +155,22 @@ private class RegexpCharacterConstant extends RegExpConstant {
155
155
RegexpCharacterConstant ( ) { this .isCharacter ( ) }
156
156
}
157
157
158
+ /**
159
+ * A regexp term that is relevant for this ReDoS analysis.
160
+ */
161
+ class RelevantRegExpTerm extends RegExpTerm {
162
+ RelevantRegExpTerm ( ) { getRoot ( this ) .isRelevant ( ) }
163
+ }
164
+
158
165
/**
159
166
* Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
160
167
*
161
168
* Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
162
169
* The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
163
170
*/
164
- private predicate isCanonicalTerm ( RegExpTerm term , string str ) {
171
+ private predicate isCanonicalTerm ( RelevantRegExpTerm term , string str ) {
165
172
term =
166
- rank [ 1 ] ( RegExpTerm t , Location loc , File file |
173
+ min ( RelevantRegExpTerm t , Location loc , File file |
167
174
loc = t .getLocation ( ) and
168
175
file = t .getFile ( ) and
169
176
str = t .getRawValue ( )
@@ -178,15 +185,15 @@ private predicate isCanonicalTerm(RegExpTerm term, string str) {
178
185
private newtype TInputSymbol =
179
186
/** An input symbol corresponding to character `c`. */
180
187
Char ( string c ) {
181
- c = any ( RegexpCharacterConstant cc | getRoot ( cc ) . isRelevant ( ) ) .getValue ( ) .charAt ( _)
188
+ c = any ( RegexpCharacterConstant cc | cc instanceof RelevantRegExpTerm ) .getValue ( ) .charAt ( _)
182
189
} or
183
190
/**
184
191
* An input symbol representing all characters matched by
185
192
* a (non-universal) character class that has string representation `charClassString`.
186
193
*/
187
194
CharClass ( string charClassString ) {
188
- exists ( RegExpTerm term | term .getRawValue ( ) = charClassString | getRoot ( term ) . isRelevant ( ) ) and
189
- exists ( RegExpTerm recc | isCanonicalTerm ( recc , charClassString ) |
195
+ exists ( RelevantRegExpTerm term | term .getRawValue ( ) = charClassString ) and
196
+ exists ( RelevantRegExpTerm recc | isCanonicalTerm ( recc , charClassString ) |
190
197
recc instanceof RegExpCharacterClass and
191
198
not recc .( RegExpCharacterClass ) .isUniversalClass ( )
192
199
or
@@ -626,13 +633,10 @@ RegExpRoot getRoot(RegExpTerm term) {
626
633
}
627
634
628
635
private newtype TState =
629
- Match ( RegExpTerm t , int i ) {
630
- getRoot ( t ) .isRelevant ( ) and
631
- (
632
- i = 0
633
- or
634
- exists ( t .( RegexpCharacterConstant ) .getValue ( ) .charAt ( i ) )
635
- )
636
+ Match ( RelevantRegExpTerm t , int i ) {
637
+ i = 0
638
+ or
639
+ exists ( t .( RegexpCharacterConstant ) .getValue ( ) .charAt ( i ) )
636
640
} or
637
641
Accept ( RegExpRoot l ) { l .isRelevant ( ) } or
638
642
AcceptAnySuffix ( RegExpRoot l ) { l .isRelevant ( ) }
0 commit comments