Skip to content

Commit 91491d9

Browse files
Jami CogswellJami Cogswell
authored andcommitted
refactor into more classes; add more test cases; add LITERAL sanitizer
1 parent 50d638d commit 91491d9

File tree

5 files changed

+132
-65
lines changed

5 files changed

+132
-65
lines changed

java/ql/lib/semmle/code/java/frameworks/Regex.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
private import semmle.code.java.dataflow.ExternalFlow
44

5+
/** The class `java.util.regex.Pattern`. */
6+
class TypeRegexPattern extends Class {
7+
TypeRegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
8+
}
9+
510
private class RegexModel extends SummaryModelCsv {
611
override predicate row(string s) {
712
s =

java/ql/lib/semmle/code/java/frameworks/apache/Lang.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,8 @@ class TypeApacheSystemUtils extends Class {
4646
this.hasQualifiedName(["org.apache.commons.lang", "org.apache.commons.lang3"], "SystemUtils")
4747
}
4848
}
49+
50+
/** The class `org.apache.commons.lang3.RegExUtils`. */
51+
class TypeApacheRegExUtils extends Class {
52+
TypeApacheRegExUtils() { this.hasQualifiedName("org.apache.commons.lang3", "RegExUtils") }
53+
}
Lines changed: 73 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,37 @@
11
/** Provides classes and predicates related to regex injection in Java. */
22

33
import java
4-
import semmle.code.java.dataflow.FlowSources
5-
import semmle.code.java.dataflow.TaintTracking
6-
import semmle.code.java.regex.RegexFlowConfigs
4+
private import semmle.code.java.dataflow.DataFlow
5+
private import semmle.code.java.frameworks.Regex
6+
private import semmle.code.java.frameworks.apache.Lang
77

8-
/**
9-
* A data flow sink for untrusted user input used to construct regular expressions.
10-
*/
8+
/** A data flow sink for untrusted user input used to construct regular expressions. */
119
abstract class Sink extends DataFlow::ExprNode { }
1210

13-
/**
14-
* A sanitizer for untrusted user input used to construct regular expressions.
15-
*/
11+
/** A sanitizer for untrusted user input used to construct regular expressions. */
1612
abstract class Sanitizer extends DataFlow::ExprNode { }
1713

18-
// TODO: look into further: Pattern.matcher, .pattern() and .toString() as taint steps, .split and .splitAsStream
19-
/**
20-
* A data flow sink for untrusted user input used to construct regular expressions.
21-
*/
22-
private class RegexSink extends Sink {
23-
RegexSink() {
14+
private class RegexInjectionSink extends Sink {
15+
RegexInjectionSink() {
2416
exists(MethodAccess ma, Method m | m = ma.getMethod() |
2517
ma.getArgument(0) = this.asExpr() and
2618
(
27-
m.getDeclaringType() instanceof TypeString and
28-
m.hasName(["matches", "split", "replaceFirst", "replaceAll"])
29-
or
30-
m.getDeclaringType() instanceof RegexPattern and
31-
m.hasName(["compile", "matches"])
19+
m instanceof StringRegexMethod or
20+
m instanceof PatternRegexMethod
3221
)
3322
or
34-
m.getDeclaringType() instanceof ApacheRegExUtils and
35-
(
36-
ma.getArgument(1) = this.asExpr() and
37-
// only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern` above
38-
m.getParameterType(1) instanceof TypeString and
39-
m.hasName([
40-
"removeAll", "removeFirst", "removePattern", "replaceAll", "replaceFirst",
41-
"replacePattern"
42-
])
43-
)
23+
ma.getArgument(1) = this.asExpr() and
24+
m instanceof ApacheRegExUtilsMethod
4425
)
4526
}
4627
}
4728

48-
/**
49-
* A call to a function whose name suggests that it escapes regular
50-
* expression meta-characters.
51-
*/
52-
class RegexInjectionSanitizer extends Sanitizer {
29+
/** A call to a function which escapes regular expression meta-characters. */
30+
private class RegexInjectionSanitizer extends Sanitizer {
5331
RegexInjectionSanitizer() {
32+
// a function whose name suggests that it escapes regular expression meta-characters
5433
exists(string calleeName, string sanitize, string regexp |
5534
calleeName = this.asExpr().(Call).getCallee().getName() and
56-
// TODO: add test case for sanitize? I think current tests only check escape
57-
// TODO: should this be broader and only look for "escape|saniti[sz]e" and not "regexp?" as well? -- e.g. err on side of FNs?
5835
sanitize = "(?:escape|saniti[sz]e)" and
5936
regexp = "regexp?"
6037
|
@@ -63,31 +40,70 @@ class RegexInjectionSanitizer extends Sanitizer {
6340
".*)")
6441
)
6542
or
66-
// adds Pattern.quote() as a sanitizer
67-
// https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#quote-java.lang.String-: "Metacharacters or escape sequences in the input sequence will be given no special meaning."
68-
// see https://rules.sonarsource.com/java/RSPEC-2631 and https://sensei.securecodewarrior.com/recipes/scw:java:regex-injection
43+
// a call to the `Pattern.quote` method, which gives metacharacters or escape sequences no special meaning
6944
exists(MethodAccess ma, Method m | m = ma.getMethod() |
70-
m.getDeclaringType() instanceof RegexPattern and
71-
(
72-
ma.getArgument(0) = this.asExpr() and
73-
m.hasName("quote")
74-
)
45+
ma.getArgument(0) = this.asExpr() and
46+
m instanceof PatternQuoteMethod
47+
)
48+
or
49+
// use of Pattern.LITERAL flag with `Pattern.compile` which gives metacharacters or escape sequences no special meaning
50+
exists(MethodAccess ma, Method m, Field field | m = ma.getMethod() |
51+
ma.getArgument(0) = this.asExpr() and
52+
m instanceof PatternRegexMethod and
53+
m.hasName("compile") and
54+
//ma.getArgument(1).toString() = "Pattern.LITERAL" and
55+
field instanceof PatternLiteral and
56+
ma.getArgument(1) = field.getAnAccess()
7557
)
7658
}
7759
}
7860

79-
// ******** HELPER CLASSES/METHODS (MAYBE MOVE ELSEWHERE?) ********
80-
// TODO: move below to Regex.qll??
81-
/** The Java class `java.util.regex.Pattern`. */
82-
private class RegexPattern extends RefType {
83-
RegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
61+
/**
62+
* The methods of the class `java.lang.String` that take a regular expression
63+
* as a parameter.
64+
*/
65+
private class StringRegexMethod extends Method {
66+
StringRegexMethod() {
67+
this.getDeclaringType() instanceof TypeString and
68+
this.hasName(["matches", "split", "replaceFirst", "replaceAll"])
69+
}
70+
}
71+
72+
/**
73+
* The methods of the class `java.util.regex.Pattern` that take a regular
74+
* expression as a parameter.
75+
*/
76+
private class PatternRegexMethod extends Method {
77+
PatternRegexMethod() {
78+
this.getDeclaringType() instanceof TypeRegexPattern and
79+
this.hasName(["compile", "matches"])
80+
}
8481
}
8582

86-
// /** The Java class `java.util.regex.Matcher`. */
87-
// private class RegexMatcher extends RefType {
88-
// RegexMatcher() { this.hasQualifiedName("java.util.regex", "Matcher") }
89-
// }
90-
/** The Java class `org.apache.commons.lang3.RegExUtils`. */
91-
private class ApacheRegExUtils extends RefType {
92-
ApacheRegExUtils() { this.hasQualifiedName("org.apache.commons.lang3", "RegExUtils") }
83+
/** The `quote` method of the `java.util.regex.Pattern` class. */
84+
private class PatternQuoteMethod extends Method {
85+
PatternQuoteMethod() { this.hasName(["quote"]) }
86+
}
87+
88+
/** The `LITERAL` field of the `java.util.regex.Pattern` class. */
89+
private class PatternLiteral extends Field {
90+
PatternLiteral() {
91+
this.getDeclaringType() instanceof TypeRegexPattern and
92+
this.hasName("LITERAL")
93+
}
94+
}
95+
96+
/**
97+
* The methods of the class `org.apache.commons.lang3.RegExUtils` that take
98+
* a regular expression of type `String` as a parameter.
99+
*/
100+
private class ApacheRegExUtilsMethod extends Method {
101+
ApacheRegExUtilsMethod() {
102+
this.getDeclaringType() instanceof TypeApacheRegExUtils and
103+
// only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern`
104+
this.getParameterType(1) instanceof TypeString and
105+
this.hasName([
106+
"removeAll", "removeFirst", "removePattern", "replaceAll", "replaceFirst", "replacePattern"
107+
])
108+
}
93109
}

java/ql/lib/semmle/code/java/security/RegexInjectionQuery.qll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ import semmle.code.java.dataflow.FlowSources
55
import semmle.code.java.dataflow.TaintTracking
66
import semmle.code.java.security.RegexInjection
77

8-
/**
9-
* A taint-tracking configuration for untrusted user input used to construct regular expressions.
10-
*/
8+
/** A taint-tracking configuration for untrusted user input used to construct regular expressions. */
119
class RegexInjectionConfiguration extends TaintTracking::Configuration {
1210
RegexInjectionConfiguration() { this = "RegexInjection" }
1311

java/ql/test/query-tests/security/CWE-730/RegexInjectionTest.java

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,55 @@ public boolean pattern6(javax.servlet.http.HttpServletRequest request) {
8484
String pattern = request.getParameter("pattern");
8585
String input = request.getParameter("input");
8686

87-
// TODO: add a "Safe" test for situation when this return value is stored in a variable, then the variable is used in the regex
8887
escapeSpecialRegexChars(pattern);
8988

9089
// BAD: the pattern is not really sanitized
9190
return input.matches("^" + pattern + "=.*$"); // $ hasRegexInjection
9291
}
9392

93+
public boolean pattern7(javax.servlet.http.HttpServletRequest request) {
94+
String pattern = request.getParameter("pattern");
95+
String input = request.getParameter("input");
96+
97+
String escapedPattern = escapeSpecialRegexChars(pattern);
98+
99+
// Safe: User input is sanitized before constructing the regex
100+
return input.matches("^" + escapedPattern + "=.*$");
101+
}
102+
103+
public boolean pattern8(javax.servlet.http.HttpServletRequest request) {
104+
String pattern = request.getParameter("pattern");
105+
String input = request.getParameter("input");
106+
107+
// Safe: User input is sanitized before constructing the regex
108+
return input.matches("^" + sanitizeSpecialRegexChars(pattern) + "=.*$");
109+
}
110+
111+
public boolean pattern9(javax.servlet.http.HttpServletRequest request) {
112+
String pattern = request.getParameter("pattern");
113+
String input = request.getParameter("input");
114+
115+
// Safe: User input is sanitized before constructing the regex
116+
return input.matches("^" + sanitiseSpecialRegexChars(pattern) + "=.*$");
117+
}
118+
94119
Pattern SPECIAL_REGEX_CHARS = Pattern.compile("[{}()\\[\\]><-=!.+*?^$\\\\|]");
95120

96-
// TODO: check if any other inbuilt escape/sanitizers in Java APIs
121+
// test `escape...regex`
97122
String escapeSpecialRegexChars(String str) {
98123
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
99124
}
100125

126+
// test `sanitize...regex`
127+
String sanitizeSpecialRegexChars(String str) {
128+
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
129+
}
130+
131+
// test `sanitise...regex`
132+
String sanitiseSpecialRegexChars(String str) {
133+
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
134+
}
135+
101136
public boolean apache1(javax.servlet.http.HttpServletRequest request) {
102137
String pattern = request.getParameter("pattern");
103138
String input = request.getParameter("input");
@@ -148,12 +183,20 @@ public boolean apache7(javax.servlet.http.HttpServletRequest request) {
148183
return RegExUtils.replacePattern(input, pattern, "").length() > 0; // $ hasRegexInjection
149184
}
150185

151-
// from https://rules.sonarsource.com/java/RSPEC-2631 to test for Pattern.quote as safe
152-
public boolean validate(javax.servlet.http.HttpServletRequest request) {
186+
// test `Pattern.quote` as safe
187+
public boolean quoteTest(javax.servlet.http.HttpServletRequest request) {
153188
String regex = request.getParameter("regex");
154189
String input = request.getParameter("input");
155190

156-
return input.matches(Pattern.quote(regex)); // Safe: with Pattern.quote, metacharacters or escape sequences will be given no special meaning
191+
return input.matches(Pattern.quote(regex)); // Safe
192+
}
193+
194+
// test `Pattern.LITERAL` as safe
195+
public boolean literalTest(javax.servlet.http.HttpServletRequest request) {
196+
String pattern = request.getParameter("pattern");
197+
String input = request.getParameter("input");
198+
199+
return Pattern.compile(pattern, Pattern.LITERAL).matcher(input).matches(); // Safe
157200
}
158201
}
159202

0 commit comments

Comments
 (0)