Skip to content

Commit 50d638d

Browse files
Jami CogswellJami Cogswell
authored andcommitted
create RegexInjection.qll file
1 parent f6f26fe commit 50d638d

File tree

3 files changed

+99
-88
lines changed

3 files changed

+99
-88
lines changed

java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ private predicate regexSinkKindInfo(string kind, boolean full, int strArg) {
3939
}
4040

4141
/** A sink that is relevant for regex flow. */
42-
class RegexFlowSink extends DataFlow::Node {
43-
// ! switch back to private!!! - just testing if this sink is useful for regex injection as well
42+
private class RegexFlowSink extends DataFlow::Node {
4443
boolean full;
4544
int strArg;
4645

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/** Provides classes and predicates related to regex injection in Java. */
2+
3+
import java
4+
import semmle.code.java.dataflow.FlowSources
5+
import semmle.code.java.dataflow.TaintTracking
6+
import semmle.code.java.regex.RegexFlowConfigs
7+
8+
/**
9+
* A data flow sink for untrusted user input used to construct regular expressions.
10+
*/
11+
abstract class Sink extends DataFlow::ExprNode { }
12+
13+
/**
14+
* A sanitizer for untrusted user input used to construct regular expressions.
15+
*/
16+
abstract class Sanitizer extends DataFlow::ExprNode { }
17+
18+
// TODO: look into further: Pattern.matcher, .pattern() and .toString() as taint steps, .split and .splitAsStream
19+
/**
20+
* A data flow sink for untrusted user input used to construct regular expressions.
21+
*/
22+
private class RegexSink extends Sink {
23+
RegexSink() {
24+
exists(MethodAccess ma, Method m | m = ma.getMethod() |
25+
ma.getArgument(0) = this.asExpr() and
26+
(
27+
m.getDeclaringType() instanceof TypeString and
28+
m.hasName(["matches", "split", "replaceFirst", "replaceAll"])
29+
or
30+
m.getDeclaringType() instanceof RegexPattern and
31+
m.hasName(["compile", "matches"])
32+
)
33+
or
34+
m.getDeclaringType() instanceof ApacheRegExUtils and
35+
(
36+
ma.getArgument(1) = this.asExpr() and
37+
// only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern` above
38+
m.getParameterType(1) instanceof TypeString and
39+
m.hasName([
40+
"removeAll", "removeFirst", "removePattern", "replaceAll", "replaceFirst",
41+
"replacePattern"
42+
])
43+
)
44+
)
45+
}
46+
}
47+
48+
/**
49+
* A call to a function whose name suggests that it escapes regular
50+
* expression meta-characters.
51+
*/
52+
class RegexInjectionSanitizer extends Sanitizer {
53+
RegexInjectionSanitizer() {
54+
exists(string calleeName, string sanitize, string regexp |
55+
calleeName = this.asExpr().(Call).getCallee().getName() and
56+
// TODO: add test case for sanitize? I think current tests only check escape
57+
// TODO: should this be broader and only look for "escape|saniti[sz]e" and not "regexp?" as well? -- e.g. err on side of FNs?
58+
sanitize = "(?:escape|saniti[sz]e)" and
59+
regexp = "regexp?"
60+
|
61+
calleeName
62+
.regexpMatch("(?i)(" + sanitize + ".*" + regexp + ".*)" + "|(" + regexp + ".*" + sanitize +
63+
".*)")
64+
)
65+
or
66+
// adds Pattern.quote() as a sanitizer
67+
// https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#quote-java.lang.String-: "Metacharacters or escape sequences in the input sequence will be given no special meaning."
68+
// see https://rules.sonarsource.com/java/RSPEC-2631 and https://sensei.securecodewarrior.com/recipes/scw:java:regex-injection
69+
exists(MethodAccess ma, Method m | m = ma.getMethod() |
70+
m.getDeclaringType() instanceof RegexPattern and
71+
(
72+
ma.getArgument(0) = this.asExpr() and
73+
m.hasName("quote")
74+
)
75+
)
76+
}
77+
}
78+
79+
// ******** HELPER CLASSES/METHODS (MAYBE MOVE ELSEWHERE?) ********
80+
// TODO: move below to Regex.qll??
81+
/** The Java class `java.util.regex.Pattern`. */
82+
private class RegexPattern extends RefType {
83+
RegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
84+
}
85+
86+
// /** The Java class `java.util.regex.Matcher`. */
87+
// private class RegexMatcher extends RefType {
88+
// RegexMatcher() { this.hasQualifiedName("java.util.regex", "Matcher") }
89+
// }
90+
/** The Java class `org.apache.commons.lang3.RegExUtils`. */
91+
private class ApacheRegExUtils extends RefType {
92+
ApacheRegExUtils() { this.hasQualifiedName("org.apache.commons.lang3", "RegExUtils") }
93+
}
Lines changed: 5 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,19 @@
1+
/** Provides configurations to be used in queries related to regex injection. */
2+
13
import java
24
import semmle.code.java.dataflow.FlowSources
35
import semmle.code.java.dataflow.TaintTracking
4-
import semmle.code.java.regex.RegexFlowConfigs
5-
6-
/** The Java class `java.util.regex.Pattern`. */
7-
private class RegexPattern extends RefType {
8-
RegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
9-
}
10-
11-
/** The Java class `java.util.regex.Matcher`. */
12-
private class RegexMatcher extends RefType {
13-
RegexMatcher() { this.hasQualifiedName("java.util.regex", "Matcher") }
14-
}
15-
16-
/** The Java class `org.apache.commons.lang3.RegExUtils`. */
17-
private class ApacheRegExUtils extends RefType {
18-
ApacheRegExUtils() { this.hasQualifiedName("java.util.regex", "Matcher") }
19-
}
20-
21-
// TODO: Look for above in pre-existing regex libraries again.
22-
// TODO: look into further: Pattern.matcher, .pattern() and .toString() as taint steps, .split and .splitAsStream
23-
/**
24-
* A data flow sink for untrusted user input used to construct regular expressions.
25-
*/
26-
class RegexSink extends DataFlow::ExprNode {
27-
RegexSink() {
28-
exists(MethodAccess ma, Method m | m = ma.getMethod() |
29-
ma.getArgument(0) = this.asExpr() and
30-
(
31-
m.getDeclaringType() instanceof TypeString and
32-
m.hasName(["matches", "split", "replaceFirst", "replaceAll"])
33-
or
34-
m.getDeclaringType() instanceof RegexPattern and
35-
m.hasName(["compile", "matches"])
36-
)
37-
or
38-
m.getDeclaringType() instanceof ApacheRegExUtils and
39-
(
40-
ma.getArgument(1) = this.asExpr() and
41-
m.getParameterType(1) instanceof TypeString and // only does String here because other option is Pattern, but that's already handled by `java.util.regex.Pattern` above
42-
m.hasName([
43-
"removeAll", "removeFirst", "removePattern", "replaceAll", "replaceFirst",
44-
"replacePattern"
45-
])
46-
)
47-
)
48-
}
49-
}
50-
51-
// ! keep and rename to RegexInjectionSanitizer IF makes sense to have two sanitizers extending it?;
52-
// ! else, ask Tony/others about if stylistically better to keep it (see default example in LogInjection.qll, etc.)
53-
// ! maybe make abstract classes for source and sink as well (if you do this, mention it in PR description as an attempt to be similar to the other languages' implementations)
54-
abstract class Sanitizer extends DataFlow::ExprNode { }
55-
56-
/**
57-
* A call to a function whose name suggests that it escapes regular
58-
* expression meta-characters.
59-
*/
60-
// ! rename as DefaultRegexInjectionSanitizer?
61-
class RegExpSanitizationCall extends Sanitizer {
62-
RegExpSanitizationCall() {
63-
exists(string calleeName, string sanitize, string regexp |
64-
calleeName = this.asExpr().(Call).getCallee().getName() and
65-
// ! add test case for sanitize? I think current tests only check escape
66-
sanitize = "(?:escape|saniti[sz]e)" and // TODO: confirm this is sufficient
67-
regexp = "regexp?" // TODO: confirm this is sufficient
68-
|
69-
calleeName
70-
.regexpMatch("(?i)(" + sanitize + ".*" + regexp + ".*)" + "|(" + regexp + ".*" + sanitize +
71-
".*)") // TODO: confirm this is sufficient
72-
)
73-
or
74-
// adds Pattern.quote() as a sanitizer
75-
// see https://rules.sonarsource.com/java/RSPEC-2631 and https://sensei.securecodewarrior.com/recipes/scw:java:regex-injection
76-
exists(MethodAccess ma, Method m | m = ma.getMethod() |
77-
m.getDeclaringType() instanceof RegexPattern and
78-
(
79-
ma.getArgument(0) = this.asExpr() and
80-
m.hasName("quote")
81-
)
82-
)
83-
}
84-
}
6+
import semmle.code.java.security.RegexInjection
857

868
/**
879
* A taint-tracking configuration for untrusted user input used to construct regular expressions.
8810
*/
8911
class RegexInjectionConfiguration extends TaintTracking::Configuration {
90-
RegexInjectionConfiguration() { this = "RegexInjectionConfiguration" }
12+
RegexInjectionConfiguration() { this = "RegexInjection" }
9113

9214
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
9315

94-
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexSink }
16+
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
9517

96-
// ! testing below RegexFlowSink from RegexFlowConfigs.qll
97-
// ! extra results from jfinal with this... look into further...
98-
// override predicate isSink(DataFlow::Node sink) { sink instanceof RegexFlowSink }
9918
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
10019
}

0 commit comments

Comments
 (0)