Skip to content

Commit eec57d4

Browse files
Simplify dataflow logic by using only one configuration, and expessing more sinks with models-as-data
1 parent 2a80540 commit eec57d4

File tree

2 files changed

+94
-147
lines changed

2 files changed

+94
-147
lines changed

java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll

Lines changed: 75 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -13,87 +13,53 @@ private class ExploitableStringLiteral extends StringLiteral {
1313
ExploitableStringLiteral() { this.getValue().matches(["%+%", "%*%", "%{%}%"]) }
1414
}
1515

16-
private class RegexCompileFlowConf extends DataFlow2::Configuration {
17-
RegexCompileFlowConf() { this = "RegexCompileFlowConfig" }
18-
19-
override predicate isSource(DataFlow::Node node) {
20-
node.asExpr() instanceof ExploitableStringLiteral
21-
}
22-
23-
override predicate isSink(DataFlow::Node node) {
24-
sinkNode(node, ["regex-compile", "regex-compile-match", "regex-compile-find"])
25-
}
26-
27-
override predicate isBarrier(DataFlow::Node node) {
28-
node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass
29-
}
30-
}
31-
32-
/**
33-
* Holds if `s` is used as a regex, with the mode `mode` (if known).
34-
* If regex mode is not known, `mode` will be `"None"`.
35-
*
36-
* As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
37-
* and therefore may be relevant for ReDoS queries are considered.
38-
*/
39-
predicate usedAsRegex(StringLiteral s, string mode, boolean match_full_string) {
40-
exists(DataFlow::Node sink |
41-
any(RegexCompileFlowConf c).hasFlow(DataFlow2::exprNode(s), sink) and
42-
mode = "None" and // TODO: proper mode detection
43-
(if matchesFullString(sink) then match_full_string = true else match_full_string = false)
44-
)
45-
}
46-
4716
/**
48-
* Holds if the regex that flows to `sink` is used to match against a full string,
49-
* as though it was implicitly surrounded by ^ and $.
17+
* Holds if `kind` is an external sink kind that is relevant for regex flow.
18+
* `full` is true if sinks with this kind match against the full string of its input.
19+
* `strArg` is the index of the argument to methods with this sink kind that contan the string to be matched against,
20+
* where -1 is the qualifier; or -2 if no such argument exists.
5021
*/
51-
private predicate matchesFullString(DataFlow::Node sink) {
52-
sinkNode(sink, "regex-compile-match")
53-
or
54-
exists(DataFlow::Node matchSource, RegexCompileToMatchConf conf |
55-
matchSource.asExpr().(MethodAccess).getAnArgument() = sink.asExpr() and
56-
conf.hasFlow(matchSource, _)
22+
private predicate regexSinkKindInfo(string kind, boolean full, int strArg) {
23+
sinkModel(_, _, _, _, _, _, _, kind) and
24+
exists(string fullStr, string strArgStr |
25+
(
26+
full = true and fullStr = "f"
27+
or
28+
full = false and fullStr = ""
29+
) and
30+
(
31+
strArgStr.toInt() = strArg
32+
or
33+
strArg = -2 and
34+
strArgStr = ""
35+
)
36+
|
37+
kind = "regex-use[" + fullStr + strArgStr + "]"
5738
)
5839
}
5940

60-
private class RegexCompileToMatchConf extends DataFlow2::Configuration {
61-
RegexCompileToMatchConf() { this = "RegexCompileToMatchConfig" }
62-
63-
override predicate isSource(DataFlow::Node node) { sourceNode(node, "regex-compile") }
41+
/** A sink that is relevant for regex flow. */
42+
private class RegexFlowSink extends DataFlow::Node {
43+
boolean full;
44+
int strArg;
6445

65-
override predicate isSink(DataFlow::Node node) { sinkNode(node, "regex-match") }
66-
67-
override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
68-
exists(MethodAccess ma | node2.asExpr() = ma and node1.asExpr() = ma.getQualifier() |
69-
ma.getMethod().hasQualifiedName("java.util.regex", "Pattern", "matcher")
46+
RegexFlowSink() {
47+
exists(string kind |
48+
regexSinkKindInfo(kind, full, strArg) and
49+
sinkNode(this, kind)
7050
)
7151
}
72-
}
73-
74-
/**
75-
* A method access that can match a regex against a string
76-
*/
77-
abstract class RegexMatchMethodAccess extends MethodAccess {
78-
string package;
79-
string type;
80-
string name;
81-
int regexArg;
82-
int stringArg;
83-
Method m;
84-
85-
RegexMatchMethodAccess() {
86-
this.getMethod().getSourceDeclaration().overrides*(m) and
87-
m.hasQualifiedName(package, type, name) and
88-
regexArg in [-1 .. m.getNumberOfParameters() - 1] and
89-
stringArg in [-1 .. m.getNumberOfParameters() - 1]
90-
}
9152

92-
/** Gets the argument of this call that the regex to be matched against flows into. */
93-
Expr getRegexArg() { result = argOf(this, regexArg) }
53+
/** Holds if a regex that flows here is matched against a full string (rather than a substring). */
54+
predicate matchesFullString() { full = true }
9455

95-
/** Gets the argument of this call that the string being matched flows into. */
96-
Expr getStringArg() { result = argOf(this, stringArg) }
56+
/** Gets the string expression that a regex that flows here is matched against, if any. */
57+
Expr getStringArgument() {
58+
exists(MethodAccess ma |
59+
this.asExpr() = argOf(ma, _) and
60+
result = argOf(ma, strArg)
61+
)
62+
}
9763
}
9864

9965
private Expr argOf(MethodAccess ma, int arg) {
@@ -115,35 +81,7 @@ class RegexAdditionalFlowStep extends Unit {
11581
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
11682
}
11783

118-
// TODO: can this be done with the models-as-data framework?
119-
private class JdkRegexMatchMethodAccess extends RegexMatchMethodAccess {
120-
JdkRegexMatchMethodAccess() {
121-
package = "java.util.regex" and
122-
type = "Pattern" and
123-
(
124-
name = "matcher" and regexArg = -1 and stringArg = 0
125-
or
126-
name = "matches" and regexArg = 0 and stringArg = 1
127-
or
128-
name = "split" and regexArg = -1 and stringArg = 0
129-
or
130-
name = "splitAsStream" and regexArg = -1 and stringArg = 0
131-
)
132-
or
133-
package = "java.lang" and
134-
type = "String" and
135-
name = ["matches", "split", "replaceAll", "replaceFirst"] and
136-
regexArg = 0 and
137-
stringArg = -1
138-
or
139-
package = "java.util.function" and
140-
type = "Predicate" and
141-
name = "test" and
142-
regexArg = -1 and
143-
stringArg = 0
144-
}
145-
}
146-
84+
// TODO: This may be able to be done with models-as-data if query-specific flow steps beome supported.
14785
private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
14886
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
14987
exists(MethodAccess ma, Method m, string package, string type, string name, int arg |
@@ -155,7 +93,7 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
15593
package = "java.util.regex" and
15694
type = "Pattern" and
15795
(
158-
name = ["asMatchPredicate", "asPredicate"] and
96+
name = ["asMatchPredicate", "asPredicate", "matcher"] and
15997
arg = -1
16098
or
16199
name = "compile" and
@@ -170,16 +108,6 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
170108
}
171109
}
172110

173-
private class GuavaRegexMatchMethodAccess extends RegexMatchMethodAccess {
174-
GuavaRegexMatchMethodAccess() {
175-
package = "com.google.common.base" and
176-
regexArg = -1 and
177-
stringArg = 0 and
178-
type = ["Splitter", "Splitter$MapSplitter"] and
179-
name = ["split", "splitToList"]
180-
}
181-
}
182-
183111
private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
184112
override predicate step(DataFlow::Node node1, DataFlow::Node node2) {
185113
exists(MethodAccess ma, Method m, string package, string type, string name, int arg |
@@ -209,20 +137,46 @@ private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
209137
}
210138
}
211139

212-
private class RegexMatchFlowConf extends DataFlow2::Configuration {
213-
RegexMatchFlowConf() { this = "RegexMatchFlowConf" }
140+
private class RegexFlowConf extends DataFlow2::Configuration {
141+
RegexFlowConf() { this = "RegexFlowConfig" }
214142

215-
override predicate isSource(DataFlow::Node src) {
216-
src.asExpr() instanceof ExploitableStringLiteral
143+
override predicate isSource(DataFlow::Node node) {
144+
node.asExpr() instanceof ExploitableStringLiteral
217145
}
218146

219-
override predicate isSink(DataFlow::Node sink) {
220-
exists(RegexMatchMethodAccess ma | sink.asExpr() = ma.getRegexArg())
221-
}
147+
override predicate isSink(DataFlow::Node node) { node instanceof RegexFlowSink }
222148

223149
override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
224150
any(RegexAdditionalFlowStep s).step(node1, node2)
225151
}
152+
153+
override predicate isBarrier(DataFlow::Node node) {
154+
node.getEnclosingCallable().getDeclaringType() instanceof NonSecurityTestClass
155+
}
156+
}
157+
158+
/**
159+
* Holds if `regex` is used as a regex, with the mode `mode` (if known).
160+
* If regex mode is not known, `mode` will be `"None"`.
161+
*
162+
* As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
163+
* and therefore may be relevant for ReDoS queries are considered.
164+
*/
165+
predicate usedAsRegex(StringLiteral regex, string mode, boolean match_full_string) {
166+
any(RegexFlowConf c).hasFlow(DataFlow2::exprNode(regex), _) and
167+
mode = "None" and // TODO: proper mode detection
168+
(if matchesFullString(regex) then match_full_string = true else match_full_string = false)
169+
}
170+
171+
/**
172+
* Holds if `regex` is used as a regular expression that is matched against a full string,
173+
* as though it was implicitly surrounded by ^ and $.
174+
*/
175+
private predicate matchesFullString(StringLiteral regex) {
176+
exists(RegexFlowConf c, RegexFlowSink sink |
177+
sink.matchesFullString() and
178+
c.hasFlow(DataFlow2::exprNode(regex), sink)
179+
)
226180
}
227181

228182
/**
@@ -232,12 +186,8 @@ private class RegexMatchFlowConf extends DataFlow2::Configuration {
232186
* and therefore may be relevant for ReDoS queries are considered.
233187
*/
234188
predicate regexMatchedAgainst(StringLiteral regex, Expr str) {
235-
exists(
236-
DataFlow::Node src, DataFlow::Node sink, RegexMatchMethodAccess ma, RegexMatchFlowConf conf
237-
|
238-
src.asExpr() = regex and
239-
sink.asExpr() = ma.getRegexArg() and
240-
conf.hasFlow(src, sink) and
241-
str = ma.getStringArg()
189+
exists(RegexFlowConf c, RegexFlowSink sink |
190+
str = sink.getStringArgument() and
191+
c.hasFlow(DataFlow2::exprNode(regex), sink)
242192
)
243193
}

java/ql/lib/semmle/code/java/regex/RegexFlowModels.qll

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,30 @@
33
import java
44
import semmle.code.java.dataflow.ExternalFlow
55

6-
private class RegexSourceCsv extends SourceModelCsv {
7-
override predicate row(string row) {
8-
row =
9-
[
10-
//"namespace;type;subtypes;name;signature;ext;output;kind"
11-
"java.util.regex;Pattern;false;compile;(String);;ReturnValue;regex-compile",
12-
]
13-
}
14-
}
15-
166
private class RegexSinkCsv extends SinkModelCsv {
177
override predicate row(string row) {
188
row =
199
[
2010
//"namespace;type;subtypes;name;signature;ext;input;kind"
21-
"java.util.regex;Pattern;false;compile;(String);;Argument[0];regex-compile",
22-
"java.util.regex;Pattern;false;compile;(String,int);;Argument[0];regex-compile",
23-
"java.util.regex;Pattern;false;matches;(String,CharSequence);;Argument[0];regex-compile-match",
24-
"java.lang;String;false;matches;(String);;Argument[0];regex-compile-match",
25-
"java.lang;String;false;split;(String);;Argument[0];regex-compile-find",
26-
"java.lang;String;false;split;(String,int);;Argument[0];regex-compile-find",
27-
"java.lang;String;false;replaceAll;(String,String);;Argument[0];regex-compile-find",
28-
"java.lang;String;false;replaceFirst;(String,String);;Argument[0];regex-compile-find",
29-
"com.google.common.base;Splitter;false;onPattern;(String);;Argument[0];regex-compile-find",
30-
// regex-match sinks
31-
"java.util.regex;Pattern;false;asMatchPredicate;();;Argument[-1];regex-match",
32-
"java.util.regex;Matcher;false;matches;();;Argument[-1];regex-match",
11+
"java.util.regex;Matcher;false;matches;();;Argument[-1];regex-use[f]",
12+
"java.util.regex;Pattern;false;asMatchPredicate;();;Argument[-1];regex-use[f]",
13+
"java.util.regex;Pattern;false;compile;(String);;Argument[0];regex-use[]",
14+
"java.util.regex;Pattern;false;compile;(String,int);;Argument[0];regex-use[]",
15+
"java.util.regex;Pattern;false;matcher;(CharSequence);;Argument[-1];regex-use[0]",
16+
"java.util.regex;Pattern;false;matches;(String,CharSequence);;Argument[0];regex-use[f1]",
17+
"java.util.regex;Pattern;false;split;(CharSequence);;Argument[-1];regex-use[0]",
18+
"java.util.regex;Pattern;false;split;(CharSequence,int);;Argument[-1];regex-use[0]",
19+
"java.util.regex;Pattern;false;splitAsStream;(CharSequence);;Argument[-1];regex-use[0]",
20+
"java.util.function;Predicate;false;test;(Object);;Argument[-1];regex-use[0]",
21+
"java.lang;String;false;matches;(String);;Argument[0];regex-use[f-1]",
22+
"java.lang;String;false;split;(String);;Argument[0];regex-use[-1]",
23+
"java.lang;String;false;split;(String,int);;Argument[0];regex-use[-1]",
24+
"java.lang;String;false;replaceAll;(String,String);;Argument[0];regex-use[-1]",
25+
"java.lang;String;false;replaceFirst;(String,String);;Argument[0];regex-use[-1]",
26+
"com.google.common.base;Splitter;false;onPattern;(String);;Argument[0];regex-use[]",
27+
"com.google.common.base;Splitter;false;split;(CharSequence);;Argument[-1];regex-use[0]",
28+
"com.google.common.base;Splitter;false;splitToList;(CharSequence);;Argument[-1];regex-use[0]",
29+
"com.google.common.base;Splitter$MapSplitter;false;split;(CharSequence);;Argument[-1];regex-use[0]",
3330
]
3431
}
3532
}

0 commit comments

Comments
 (0)