Skip to content

Commit 4695923

Browse files
authored
Merge pull request github#6288 from erik-krogh/emptyRedos
JS/Python: Fix FP in redos related to empty lookaheads
2 parents 166a6b0 + e962a7c commit 4695923

File tree

10 files changed

+141
-60
lines changed

10 files changed

+141
-60
lines changed

javascript/ql/src/semmle/javascript/security/performance/ReDoSUtil.qll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,49 @@ private int ascii(string char) {
7171
)
7272
}
7373

74+
/**
75+
* Holds if `t` matches at least an epsilon symbol.
76+
*
77+
* That is, this term does not restrict the language of the enclosing regular expression.
78+
*
79+
* This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
80+
*/
81+
predicate matchesEpsilon(RegExpTerm t) {
82+
t instanceof RegExpStar
83+
or
84+
t instanceof RegExpOpt
85+
or
86+
t.(RegExpRange).getLowerBound() = 0
87+
or
88+
exists(RegExpTerm child |
89+
child = t.getAChild() and
90+
matchesEpsilon(child)
91+
|
92+
t instanceof RegExpAlt or
93+
t instanceof RegExpGroup or
94+
t instanceof RegExpPlus or
95+
t instanceof RegExpRange
96+
)
97+
or
98+
matchesEpsilon(t.(RegExpBackRef).getGroup())
99+
or
100+
forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child))
101+
}
102+
103+
/**
104+
* A lookahead/lookbehind that matches the empty string.
105+
*/
106+
class EmptyPositiveSubPatttern extends RegExpSubPattern {
107+
EmptyPositiveSubPatttern() {
108+
(
109+
this instanceof RegExpPositiveLookahead
110+
or
111+
this instanceof RegExpPositiveLookbehind
112+
) and
113+
matchesEpsilon(this.getOperand())
114+
}
115+
}
116+
74117
/**
75118
* A branch in a disjunction that is the root node in a literal, or a literal
76119
* whose root node is not a disjunction.
@@ -550,6 +593,10 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
550593
exists(RegExpDollar dollar | q1 = before(dollar) |
551594
lbl = Epsilon() and q2 = Accept(getRoot(dollar))
552595
)
596+
or
597+
exists(EmptyPositiveSubPatttern empty | q1 = before(empty) |
598+
lbl = Epsilon() and q2 = after(empty)
599+
)
553600
}
554601

555602
/**

javascript/ql/src/semmle/javascript/security/performance/SuperlinearBackTracking.qll

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -363,35 +363,6 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
363363
)
364364
}
365365

366-
/**
367-
* Holds if `t` matches at least an epsilon symbol.
368-
*
369-
* That is, this term does not restrict the language of the enclosing regular expression.
370-
*
371-
* This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
372-
*/
373-
private predicate matchesEpsilon(RegExpTerm t) {
374-
t instanceof RegExpStar
375-
or
376-
t instanceof RegExpOpt
377-
or
378-
t.(RegExpRange).getLowerBound() = 0
379-
or
380-
exists(RegExpTerm child |
381-
child = t.getAChild() and
382-
matchesEpsilon(child)
383-
|
384-
t instanceof RegExpAlt or
385-
t instanceof RegExpGroup or
386-
t instanceof RegExpPlus or
387-
t instanceof RegExpRange
388-
)
389-
or
390-
matchesEpsilon(t.(RegExpBackRef).getGroup())
391-
or
392-
forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child))
393-
}
394-
395366
/**
396367
* Gets a message for why `term` can cause polynomial backtracking.
397368
*/

javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,3 +493,11 @@
493493
| tst.js:351:15:351:16 | a+ | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a+)* |
494494
| tst.js:352:15:352:16 | a* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a*)+b |
495495
| tst.js:353:15:353:16 | a+ | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding (a+)+ |
496+
| tst.js:372:16:372:21 | [^"]*? | Strings starting with '"' and with many repetitions of '""' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+(?=\\s*\|\\s*$)X |
497+
| tst.js:372:24:372:30 | [^"\\s]+ | Strings with many repetitions of '!' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+ |
498+
| tst.js:373:16:373:21 | [^"]*? | Strings starting with '"' and with many repetitions of '""' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+(?=X) |
499+
| tst.js:373:24:373:30 | [^"\\s]+ | Strings with many repetitions of '!' can start matching anywhere after the start of the preceeding ("[^"]*?"\|[^"\\s]+)+ |
500+
| tst.js:374:15:374:16 | x* | Strings with many repetitions of 'x' can start matching anywhere after the start of the preceeding (x*)+(?=$) |
501+
| tst.js:375:15:375:16 | x* | Strings with many repetitions of 'x' can start matching anywhere after the start of the preceeding (x*)+(?=$\|y) |
502+
| tst.js:378:16:378:22 | [\\s\\S]* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding ([\\s\\S]*)+(?=$) |
503+
| tst.js:379:16:379:22 | [\\s\\S]* | Strings with many repetitions of 'a' can start matching anywhere after the start of the preceeding ([\\s\\S]*)+(?=$\|y) |

javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,9 @@
172172
| tst.js:361:15:361:33 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
173173
| tst.js:362:15:362:35 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
174174
| tst.js:363:15:363:38 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
175+
| tst.js:372:24:372:30 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
176+
| tst.js:373:24:373:30 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
177+
| tst.js:374:15:374:16 | x* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'x'. |
178+
| tst.js:375:15:375:16 | x* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'x'. |
179+
| tst.js:378:16:378:22 | [\\s\\S]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
180+
| tst.js:379:16:379:22 | [\\s\\S]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

javascript/ql/test/query-tests/Performance/ReDoS/tst.js

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,4 +363,17 @@ var bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/;
363363
var bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/;
364364

365365
// GOOD:
366-
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
366+
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;
367+
368+
// GOOD
369+
var good43 = /("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)/g;
370+
371+
// BAD
372+
var bad87 = /("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X/g;
373+
var bad88 = /("[^"]*?"|[^"\s]+)+(?=X)/g;
374+
var bad89 = /(x*)+(?=$)/
375+
var bad90 = /(x*)+(?=$|y)/
376+
377+
// GOOD - but we spuriously conclude that a rejecting suffix exists.
378+
var good44 = /([\s\S]*)+(?=$)/;
379+
var good45 = /([\s\S]*)+(?=$|y)/;

python/ql/src/semmle/python/RegexTreeView.qll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,15 @@ class RegExpZeroWidthMatch extends RegExpGroup {
836836
*/
837837
class RegExpSubPattern extends RegExpZeroWidthMatch {
838838
RegExpSubPattern() { not re.emptyGroup(start, end) }
839+
840+
/** Gets the lookahead term. */
841+
RegExpTerm getOperand() {
842+
exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
843+
result.getRegex() = re and
844+
result.getStart() = in_start and
845+
result.getEnd() = in_end
846+
)
847+
}
839848
}
840849

841850
/**

python/ql/src/semmle/python/security/performance/ReDoSUtil.qll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,49 @@ private int ascii(string char) {
7171
)
7272
}
7373

74+
/**
75+
* Holds if `t` matches at least an epsilon symbol.
76+
*
77+
* That is, this term does not restrict the language of the enclosing regular expression.
78+
*
79+
* This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
80+
*/
81+
predicate matchesEpsilon(RegExpTerm t) {
82+
t instanceof RegExpStar
83+
or
84+
t instanceof RegExpOpt
85+
or
86+
t.(RegExpRange).getLowerBound() = 0
87+
or
88+
exists(RegExpTerm child |
89+
child = t.getAChild() and
90+
matchesEpsilon(child)
91+
|
92+
t instanceof RegExpAlt or
93+
t instanceof RegExpGroup or
94+
t instanceof RegExpPlus or
95+
t instanceof RegExpRange
96+
)
97+
or
98+
matchesEpsilon(t.(RegExpBackRef).getGroup())
99+
or
100+
forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child))
101+
}
102+
103+
/**
104+
* A lookahead/lookbehind that matches the empty string.
105+
*/
106+
class EmptyPositiveSubPatttern extends RegExpSubPattern {
107+
EmptyPositiveSubPatttern() {
108+
(
109+
this instanceof RegExpPositiveLookahead
110+
or
111+
this instanceof RegExpPositiveLookbehind
112+
) and
113+
matchesEpsilon(this.getOperand())
114+
}
115+
}
116+
74117
/**
75118
* A branch in a disjunction that is the root node in a literal, or a literal
76119
* whose root node is not a disjunction.
@@ -550,6 +593,10 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
550593
exists(RegExpDollar dollar | q1 = before(dollar) |
551594
lbl = Epsilon() and q2 = Accept(getRoot(dollar))
552595
)
596+
or
597+
exists(EmptyPositiveSubPatttern empty | q1 = before(empty) |
598+
lbl = Epsilon() and q2 = after(empty)
599+
)
553600
}
554601

555602
/**

python/ql/src/semmle/python/security/performance/SuperlinearBackTracking.qll

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -363,35 +363,6 @@ predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm
363363
)
364364
}
365365

366-
/**
367-
* Holds if `t` matches at least an epsilon symbol.
368-
*
369-
* That is, this term does not restrict the language of the enclosing regular expression.
370-
*
371-
* This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
372-
*/
373-
private predicate matchesEpsilon(RegExpTerm t) {
374-
t instanceof RegExpStar
375-
or
376-
t instanceof RegExpOpt
377-
or
378-
t.(RegExpRange).getLowerBound() = 0
379-
or
380-
exists(RegExpTerm child |
381-
child = t.getAChild() and
382-
matchesEpsilon(child)
383-
|
384-
t instanceof RegExpAlt or
385-
t instanceof RegExpGroup or
386-
t instanceof RegExpPlus or
387-
t instanceof RegExpRange
388-
)
389-
or
390-
matchesEpsilon(t.(RegExpBackRef).getGroup())
391-
or
392-
forex(RegExpTerm child | child = t.(RegExpSequence).getAChild() | matchesEpsilon(child))
393-
}
394-
395366
/**
396367
* Gets a message for why `term` can cause polynomial backtracking.
397368
*/

python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,5 +93,7 @@
9393
| redos.py:364:25:364:45 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
9494
| redos.py:365:25:365:48 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
9595
| redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
96+
| redos.py:380:35:380:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
97+
| redos.py:381:35:381:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
9698
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
9799
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |

python/ql/test/query-tests/Security/CWE-730-ReDoS/redos.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,4 +371,11 @@
371371
bad87 = re.compile(r'X(\u0061|a)*Y')
372372

373373
# GOOD
374-
good43 = re.compile(r'X(\u0061|b)+Y')
374+
good43 = re.compile(r'X(\u0061|b)+Y')
375+
376+
# GOOD
377+
good44 = re.compile(r'("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)')
378+
379+
# BAD
380+
bad88 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X')
381+
bad89 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=X)')

0 commit comments

Comments
 (0)