Skip to content

Commit 3f7f5d2

Browse files
committed
performance improvements in ReDoSUtil
1 parent 49e4764 commit 3f7f5d2

File tree

2 files changed

+32
-24
lines changed
  • javascript/ql/src/semmle/javascript/security/performance
  • python/ql/src/semmle/python/security/performance

2 files changed

+32
-24
lines changed

javascript/ql/src/semmle/javascript/security/performance/ReDoSUtil.qll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,22 @@ private class RegexpCharacterConstant extends RegExpConstant {
155155
RegexpCharacterConstant() { this.isCharacter() }
156156
}
157157

158+
/**
159+
* A regexp term that is relevant for this ReDoS analysis.
160+
*/
161+
class RelevantRegExpTerm extends RegExpTerm {
162+
RelevantRegExpTerm() { getRoot(this).isRelevant() }
163+
}
164+
158165
/**
159166
* Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
160167
*
161168
* Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
162169
* The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
163170
*/
164-
private predicate isCanonicalTerm(RegExpTerm term, string str) {
171+
private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
165172
term =
166-
rank[1](RegExpTerm t, Location loc, File file |
173+
min(RelevantRegExpTerm t, Location loc, File file |
167174
loc = t.getLocation() and
168175
file = t.getFile() and
169176
str = t.getRawValue()
@@ -178,15 +185,15 @@ private predicate isCanonicalTerm(RegExpTerm term, string str) {
178185
private newtype TInputSymbol =
179186
/** An input symbol corresponding to character `c`. */
180187
Char(string c) {
181-
c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
188+
c = any(RegexpCharacterConstant cc | cc instanceof RelevantRegExpTerm).getValue().charAt(_)
182189
} or
183190
/**
184191
* An input symbol representing all characters matched by
185192
* a (non-universal) character class that has string representation `charClassString`.
186193
*/
187194
CharClass(string charClassString) {
188-
exists(RegExpTerm term | term.getRawValue() = charClassString | getRoot(term).isRelevant()) and
189-
exists(RegExpTerm recc | isCanonicalTerm(recc, charClassString) |
195+
exists(RelevantRegExpTerm term | term.getRawValue() = charClassString) and
196+
exists(RelevantRegExpTerm recc | isCanonicalTerm(recc, charClassString) |
190197
recc instanceof RegExpCharacterClass and
191198
not recc.(RegExpCharacterClass).isUniversalClass()
192199
or
@@ -626,13 +633,10 @@ RegExpRoot getRoot(RegExpTerm term) {
626633
}
627634

628635
private newtype TState =
629-
Match(RegExpTerm t, int i) {
630-
getRoot(t).isRelevant() and
631-
(
632-
i = 0
633-
or
634-
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
635-
)
636+
Match(RelevantRegExpTerm t, int i) {
637+
i = 0
638+
or
639+
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
636640
} or
637641
Accept(RegExpRoot l) { l.isRelevant() } or
638642
AcceptAnySuffix(RegExpRoot l) { l.isRelevant() }

python/ql/src/semmle/python/security/performance/ReDoSUtil.qll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,22 @@ private class RegexpCharacterConstant extends RegExpConstant {
155155
RegexpCharacterConstant() { this.isCharacter() }
156156
}
157157

158+
/**
159+
* A regexp term that is relevant for this ReDoS analysis.
160+
*/
161+
class RelevantRegExpTerm extends RegExpTerm {
162+
RelevantRegExpTerm() { getRoot(this).isRelevant() }
163+
}
164+
158165
/**
159166
* Holds if `term` is the chosen canonical representative for all terms with string representation `str`.
160167
*
161168
* Using canonical representatives gives a huge performance boost when working with tuples containing multiple `InputSymbol`s.
162169
* The number of `InputSymbol`s is decreased by 3 orders of magnitude or more in some larger benchmarks.
163170
*/
164-
private predicate isCanonicalTerm(RegExpTerm term, string str) {
171+
private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
165172
term =
166-
rank[1](RegExpTerm t, Location loc, File file |
173+
min(RelevantRegExpTerm t, Location loc, File file |
167174
loc = t.getLocation() and
168175
file = t.getFile() and
169176
str = t.getRawValue()
@@ -178,15 +185,15 @@ private predicate isCanonicalTerm(RegExpTerm term, string str) {
178185
private newtype TInputSymbol =
179186
/** An input symbol corresponding to character `c`. */
180187
Char(string c) {
181-
c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
188+
c = any(RegexpCharacterConstant cc | cc instanceof RelevantRegExpTerm).getValue().charAt(_)
182189
} or
183190
/**
184191
* An input symbol representing all characters matched by
185192
* a (non-universal) character class that has string representation `charClassString`.
186193
*/
187194
CharClass(string charClassString) {
188-
exists(RegExpTerm term | term.getRawValue() = charClassString | getRoot(term).isRelevant()) and
189-
exists(RegExpTerm recc | isCanonicalTerm(recc, charClassString) |
195+
exists(RelevantRegExpTerm term | term.getRawValue() = charClassString) and
196+
exists(RelevantRegExpTerm recc | isCanonicalTerm(recc, charClassString) |
190197
recc instanceof RegExpCharacterClass and
191198
not recc.(RegExpCharacterClass).isUniversalClass()
192199
or
@@ -626,13 +633,10 @@ RegExpRoot getRoot(RegExpTerm term) {
626633
}
627634

628635
private newtype TState =
629-
Match(RegExpTerm t, int i) {
630-
getRoot(t).isRelevant() and
631-
(
632-
i = 0
633-
or
634-
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
635-
)
636+
Match(RelevantRegExpTerm t, int i) {
637+
i = 0
638+
or
639+
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
636640
} or
637641
Accept(RegExpRoot l) { l.isRelevant() } or
638642
AcceptAnySuffix(RegExpRoot l) { l.isRelevant() }

0 commit comments

Comments
 (0)