Skip to content

Commit bff59a1

Browse files
committed
fix parse error in regular expressions
1 parent 84554af commit bff59a1

File tree

5 files changed

+33
-4
lines changed

5 files changed

+33
-4
lines changed

javascript/extractor/src/com/semmle/js/extractor/Main.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public class Main {
4343
* A version identifier that should be updated every time the extractor changes in such a way that
4444
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
4545
*/
46-
public static final String EXTRACTOR_VERSION = "2021-02-24";
46+
public static final String EXTRACTOR_VERSION = "2021-03-08";
4747

4848
public static final Pattern NEWLINE = Pattern.compile("\n");
4949

javascript/extractor/src/com/semmle/js/parser/RegExpParser.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,11 +282,18 @@ private RegExpTerm parseQuantifierOpt(SourceLocation loc, RegExpTerm atom) {
282282
if (this.match("+")) return this.finishTerm(new Plus(loc, atom, !this.match("?")));
283283
if (this.match("?")) return this.finishTerm(new Opt(loc, atom, !this.match("?")));
284284
if (this.match("{")) {
285-
Double lo = toNumber(this.readDigits(false)), hi;
285+
String matched = "{"; // keeping track of the string matched so far, in case this turns out not to be a quantifier.
286+
String digits = this.readDigits(false);
287+
matched += digits;
288+
Double lo = toNumber(digits), hi;
289+
int prevPos = this.pos;
286290
if (this.match(",")) {
291+
matched += ",";
287292
if (!this.lookahead("}")) {
288293
// atom{lo, hi}
289-
hi = toNumber(this.readDigits(false));
294+
digits = this.readDigits(false);
295+
matched += digits;
296+
hi = toNumber(digits);
290297
} else {
291298
// atom{lo,}
292299
hi = null;
@@ -295,7 +302,11 @@ private RegExpTerm parseQuantifierOpt(SourceLocation loc, RegExpTerm atom) {
295302
// atom{lo}
296303
hi = lo;
297304
}
298-
this.expectRBrace();
305+
if (!this.match("}")) {
306+
// Not a quantifier, just parsing it as a constant.
307+
// E.g. a Regexp such as `/a{|X/`, where there is no matching `}`.
308+
return this.finishTerm(new Sequence(loc, Arrays.asList(atom, new Constant(loc, matched))));
309+
}
299310
return this.finishTerm(new Range(loc, atom, !this.match("?"), lo, hi));
300311
}
301312
return atom;

javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,7 @@
168168
| tst.js:351:15:351:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
169169
| tst.js:352:15:352:16 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
170170
| tst.js:353:15:353:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
171+
| tst.js:360:15:360:30 | ((?:a{\|-)\|\\w\\{)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{'. |
172+
| tst.js:361:15:361:33 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
173+
| tst.js:362:15:362:35 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
174+
| tst.js:363:15:363:38 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |

javascript/ql/test/query-tests/Performance/ReDoS/tst.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,3 +355,12 @@ var bad82 = /(a+)+b/;
355355
// GOOD
356356
var good40 = /(a|b)+/;
357357
var good41 = /(?:[\s;,"'<>(){}|[\]@=+*]|:(?![/\\]))+/;
358+
359+
// NOT GOOD
360+
var bad83 = /^((?:a{|-)|\w\{)+X$/;
361+
var bad84 = /^((?:a{0|-)|\w\{\d)+X$/;
362+
var bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/;
363+
var bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/;
364+
365+
// GOOD:
366+
var good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/;

javascript/ql/test/query-tests/RegExp/RegExpAlwaysMatches/tst.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,8 @@ function nonWordBoundary(x) {
8585
function emptyRegex(x) {
8686
return new RegExp("").test(x); // NOT OK
8787
}
88+
89+
function parserTest(x) {
90+
/(\w\s*:\s*[^:}]+|#){|@import[^\n]+(?:url|,)/.test(x); // OK
91+
/^((?:a{0,2}|-)|\w\{\d,\d\})+X$/.text(x); // ok
92+
}

0 commit comments

Comments
 (0)