Skip to content

Commit 4a37c2f

Browse files
authored
Merge pull request #13778 from geoffw0/javaparsemode
Java: Understand multiple parse mode flags specified in a regular expression string
2 parents dc299fc + af3d8c8 commit 4a37c2f

File tree

5 files changed

+80
-9
lines changed

5 files changed

+80
-9
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Regular expressions containing multiple parse mode flags are now interpretted correctly. For example `"(?is)abc.*"` with both the `i` and `s` flags.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: fix
3+
---
4+
* The regular expressions library no longer incorrectly matches mode flag characters against the input.

java/ql/lib/semmle/code/java/regex/regex.qll

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -472,20 +472,56 @@ abstract class RegexString extends StringLiteral {
472472
)
473473
}
474474

475-
private predicate flagGroupStart(int start, int end, string c) {
475+
/**
476+
* Holds if the initial part of a parse mode, not containing any
477+
* mode characters is between `start` and `end`.
478+
*/
479+
private predicate flagGroupStartNoModes(int start, int end) {
476480
this.isGroupStart(start) and
477481
this.getChar(start + 1) = "?" and
478-
end = start + 3 and
479-
c = this.getChar(start + 2) and
480-
c in ["i", "m", "s", "u", "x", "U"]
482+
this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and
483+
end = start + 2
484+
}
485+
486+
/**
487+
* Holds if `pos` contains a mode character from the
488+
* flag group starting at `start`.
489+
*/
490+
private predicate modeCharacter(int start, int pos) {
491+
this.flagGroupStartNoModes(start, pos)
492+
or
493+
this.modeCharacter(start, pos - 1) and
494+
this.getChar(pos) in ["i", "m", "s", "u", "x", "U"]
495+
}
496+
497+
/**
498+
* Holds if a parse mode group is between `start` and `end`.
499+
*/
500+
private predicate flagGroupStart(int start, int end) {
501+
this.flagGroupStartNoModes(start, _) and
502+
end = max(int i | this.modeCharacter(start, i) | i + 1)
503+
}
504+
505+
/**
506+
* Holds if a parse mode group of this regex includes the mode flag `c`.
507+
* For example the following parse mode group, with mode flag `i`:
508+
* ```
509+
* (?i)
510+
* ```
511+
*/
512+
private predicate flag(string c) {
513+
exists(int pos |
514+
this.modeCharacter(_, pos) and
515+
this.getChar(pos) = c
516+
)
481517
}
482518

483519
/**
484520
* Gets the mode of this regular expression string if
485521
* it is defined by a prefix.
486522
*/
487523
string getModeFromPrefix() {
488-
exists(string c | this.flagGroupStart(_, _, c) |
524+
exists(string c | this.flag(c) |
489525
c = "i" and result = "IGNORECASE"
490526
or
491527
c = "m" and result = "MULTILINE"
@@ -540,7 +576,7 @@ abstract class RegexString extends StringLiteral {
540576
private predicate groupStart(int start, int end) {
541577
this.nonCapturingGroupStart(start, end)
542578
or
543-
this.flagGroupStart(start, end, _)
579+
this.flagGroupStart(start, end)
544580
or
545581
this.namedGroupStart(start, end)
546582
or

java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ class ExpRedosTest {
8686
// NOT GOOD; attack: "\n".repeat(100) + "."
8787
"(?s)(.|\\n)*!", // $ hasExpRedos
8888

89+
// NOT GOOD; attack: "\n".repeat(100) + "."
90+
"(?is)(.|\\n)*!", // $ hasExpRedos
91+
8992
// GOOD
9093
"([\\w.]+)*",
9194

@@ -120,7 +123,7 @@ class ExpRedosTest {
120123
"\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"", // $ MISSING: hasExpRedos
121124

122125
// GOOD
123-
"\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"",
126+
"\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"",
124127

125128
// NOT GOOD
126129
"(([a-z]|[d-h])*)\"", // $ hasExpRedos
@@ -428,7 +431,10 @@ class ExpRedosTest {
428431
"(a*)*b", // $ hasExpRedos
429432

430433
// BAD - but not detected due to the way possessive quantifiers are approximated
431-
"((aa|a*+)b)*c" // $ MISSING: hasExpRedos
434+
"((aa|a*+)b)*c", // $ MISSING: hasExpRedos
435+
436+
// BAD - testing mode flag groups
437+
"(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg= hasPump=a
432438
};
433439

434440
void test() {

java/ql/test/query-tests/security/CWE-730/ReDoS.ql

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,13 @@ private import semmle.code.java.regex.RegexTreeView::RegexTreeView as TreeView
44
import codeql.regex.nfa.ExponentialBackTracking::Make<TreeView> as ExponentialBackTracking
55
import semmle.code.java.regex.regex
66

7+
bindingset[s]
8+
string quote(string s) { if s.matches("% %") then result = "\"" + s + "\"" else result = s }
9+
710
module HasExpRedos implements TestSig {
8-
string getARelevantTag() { result = ["hasExpRedos", "hasParseFailure"] }
11+
string getARelevantTag() {
12+
result = ["hasExpRedos", "hasParseFailure", "hasPump", "hasPrefixMsg"]
13+
}
914

1015
predicate hasActualResult(Location location, string element, string tag, string value) {
1116
tag = "hasExpRedos" and
@@ -25,6 +30,22 @@ module HasExpRedos implements TestSig {
2530
element = r.toString()
2631
)
2732
}
33+
34+
predicate hasOptionalResult(Location location, string element, string tag, string value) {
35+
exists(TreeView::RegExpTerm t, Regex r, string pump, string prefixMsg |
36+
ExponentialBackTracking::hasReDoSResult(t, pump, _, prefixMsg) and
37+
t.occursInRegex(r, _, _) and
38+
(
39+
tag = "hasPrefixMsg" and
40+
value = quote(prefixMsg)
41+
or
42+
tag = "hasPump" and
43+
value = pump
44+
) and
45+
location = r.getLocation() and
46+
element = r.toString()
47+
)
48+
}
2849
}
2950

3051
import MakeTest<HasExpRedos>

0 commit comments

Comments
 (0)