Skip to content

Commit a4661e1

Browse files
authored
Merge pull request github#5704 from edvraa/regexj
Java: Regex injection
2 parents 0b22541 + 5eb96c1 commit a4661e1

File tree

7 files changed

+392
-0
lines changed

7 files changed

+392
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package com.example.demo;
2+
3+
import java.util.regex.Matcher;
4+
import java.util.regex.Pattern;
5+
6+
import org.springframework.web.bind.annotation.GetMapping;
7+
import org.springframework.web.bind.annotation.RequestParam;
8+
import org.springframework.web.bind.annotation.RestController;
9+
10+
@RestController
11+
public class DemoApplication {
12+
13+
@GetMapping("/string1")
14+
public String string1(@RequestParam(value = "input", defaultValue = "test") String input,
15+
@RequestParam(value = "pattern", defaultValue = ".*") String pattern) {
16+
// BAD: Unsanitized user input is used to construct a regular expression
17+
if (input.matches("^" + pattern + "=.*$"))
18+
return "match!";
19+
20+
return "doesn't match!";
21+
}
22+
23+
@GetMapping("/string2")
24+
public String string2(@RequestParam(value = "input", defaultValue = "test") String input,
25+
@RequestParam(value = "pattern", defaultValue = ".*") String pattern) {
26+
// GOOD: User input is sanitized before constructing the regex
27+
if (input.matches("^" + escapeSpecialRegexChars(pattern) + "=.*$"))
28+
return "match!";
29+
30+
return "doesn't match!";
31+
}
32+
33+
Pattern SPECIAL_REGEX_CHARS = Pattern.compile("[{}()\\[\\]><-=!.+*?^$\\\\|]");
34+
35+
String escapeSpecialRegexChars(String str) {
36+
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
37+
}
38+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
9+
be able to modify the meaning of the expression. In particular, such a user may be able to provide
10+
a regular expression fragment that takes exponential time in the worst case, and use that to
11+
perform a Denial of Service attack.
12+
</p>
13+
</overview>
14+
15+
<recommendation>
16+
<p>
17+
Before embedding user input into a regular expression, use a sanitization function
18+
to escape meta-characters that have special meaning.
19+
</p>
20+
</recommendation>
21+
22+
<example>
23+
<p>
24+
The following example shows a HTTP request parameter that is used to construct a regular expression:
25+
</p>
26+
<sample src="RegexInjection.java" />
27+
<p>
28+
In the first case the user-provided regex is not escaped.
29+
If a malicious user provides a regex that has exponential worst case performance,
30+
then this could lead to a Denial of Service.
31+
</p>
32+
<p>
33+
In the second case, the user input is escaped using <code>escapeSpecialRegexChars</code> before being included
34+
in the regular expression. This ensures that the user cannot insert characters which have a special
35+
meaning in regular expressions.
36+
</p>
37+
</example>
38+
39+
<references>
40+
<li>
41+
OWASP:
42+
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
43+
</li>
44+
<li>
45+
Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.
46+
</li>
47+
</references>
48+
</qhelp>
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/**
2+
* @name Regular expression injection
3+
* @description User input should not be used in regular expressions without first being sanitized,
4+
* otherwise a malicious user may be able to provide a regex that could require
5+
* exponential time on certain inputs.
6+
* @kind path-problem
7+
* @problem.severity error
8+
* @precision high
9+
* @id java/regex-injection
10+
* @tags security
11+
* external/cwe/cwe-730
12+
* external/cwe/cwe-400
13+
*/
14+
15+
import java
16+
import semmle.code.java.dataflow.FlowSources
17+
import semmle.code.java.dataflow.TaintTracking
18+
import DataFlow::PathGraph
19+
20+
/**
21+
* A data flow sink for untrusted user input used to construct regular expressions.
22+
*/
23+
class RegexSink extends DataFlow::ExprNode {
24+
RegexSink() {
25+
exists(MethodAccess ma, Method m | m = ma.getMethod() |
26+
(
27+
m.getDeclaringType() instanceof TypeString and
28+
(
29+
ma.getArgument(0) = this.asExpr() and
30+
m.hasName(["matches", "split", "replaceFirst", "replaceAll"])
31+
)
32+
or
33+
m.getDeclaringType().hasQualifiedName("java.util.regex", "Pattern") and
34+
(
35+
ma.getArgument(0) = this.asExpr() and
36+
m.hasName(["compile", "matches"])
37+
)
38+
or
39+
m.getDeclaringType().hasQualifiedName("org.apache.commons.lang3", "RegExUtils") and
40+
(
41+
ma.getArgument(1) = this.asExpr() and
42+
m.getParameterType(1) instanceof TypeString and
43+
m.hasName([
44+
"removeAll", "removeFirst", "removePattern", "replaceAll", "replaceFirst",
45+
"replacePattern"
46+
])
47+
)
48+
)
49+
)
50+
}
51+
}
52+
53+
abstract class Sanitizer extends DataFlow::ExprNode { }
54+
55+
/**
56+
* A call to a function whose name suggests that it escapes regular
57+
* expression meta-characters.
58+
*/
59+
class RegExpSanitizationCall extends Sanitizer {
60+
RegExpSanitizationCall() {
61+
exists(string calleeName, string sanitize, string regexp |
62+
calleeName = this.asExpr().(Call).getCallee().getName() and
63+
sanitize = "(?:escape|saniti[sz]e)" and
64+
regexp = "regexp?"
65+
|
66+
calleeName
67+
.regexpMatch("(?i)(" + sanitize + ".*" + regexp + ".*)" + "|(" + regexp + ".*" + sanitize +
68+
".*)")
69+
)
70+
}
71+
}
72+
73+
/**
74+
* A taint-tracking configuration for untrusted user input used to construct regular expressions.
75+
*/
76+
class RegexInjectionConfiguration extends TaintTracking::Configuration {
77+
RegexInjectionConfiguration() { this = "RegexInjectionConfiguration" }
78+
79+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
80+
81+
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexSink }
82+
83+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
84+
}
85+
86+
from DataFlow::PathNode source, DataFlow::PathNode sink, RegexInjectionConfiguration c
87+
where c.hasFlowPath(source, sink)
88+
select sink.getNode(), source, sink, "$@ is user controlled.", source.getNode(),
89+
"This regular expression pattern"
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
edges
2+
| RegexInjection.java:13:22:13:52 | getParameter(...) : String | RegexInjection.java:16:26:16:47 | ... + ... |
3+
| RegexInjection.java:20:22:20:52 | getParameter(...) : String | RegexInjection.java:23:24:23:30 | pattern |
4+
| RegexInjection.java:27:22:27:52 | getParameter(...) : String | RegexInjection.java:30:31:30:37 | pattern |
5+
| RegexInjection.java:34:22:34:52 | getParameter(...) : String | RegexInjection.java:37:29:37:35 | pattern |
6+
| RegexInjection.java:41:22:41:52 | getParameter(...) : String | RegexInjection.java:44:34:44:40 | pattern |
7+
| RegexInjection.java:51:22:51:52 | getParameter(...) : String | RegexInjection.java:54:28:54:34 | pattern |
8+
| RegexInjection.java:58:22:58:52 | getParameter(...) : String | RegexInjection.java:61:28:61:34 | pattern |
9+
| RegexInjection.java:65:22:65:52 | getParameter(...) : String | RegexInjection.java:68:36:68:42 | pattern : String |
10+
| RegexInjection.java:68:32:68:43 | foo(...) : String | RegexInjection.java:68:26:68:52 | ... + ... |
11+
| RegexInjection.java:68:36:68:42 | pattern : String | RegexInjection.java:68:32:68:43 | foo(...) : String |
12+
| RegexInjection.java:84:22:84:52 | getParameter(...) : String | RegexInjection.java:90:26:90:47 | ... + ... |
13+
| RegexInjection.java:100:22:100:52 | getParameter(...) : String | RegexInjection.java:103:40:103:46 | pattern |
14+
| RegexInjection.java:107:22:107:52 | getParameter(...) : String | RegexInjection.java:110:42:110:48 | pattern |
15+
| RegexInjection.java:114:22:114:52 | getParameter(...) : String | RegexInjection.java:117:44:117:50 | pattern |
16+
| RegexInjection.java:121:22:121:52 | getParameter(...) : String | RegexInjection.java:124:41:124:47 | pattern |
17+
| RegexInjection.java:128:22:128:52 | getParameter(...) : String | RegexInjection.java:131:43:131:49 | pattern |
18+
| RegexInjection.java:143:22:143:52 | getParameter(...) : String | RegexInjection.java:146:45:146:51 | pattern |
19+
nodes
20+
| RegexInjection.java:13:22:13:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
21+
| RegexInjection.java:16:26:16:47 | ... + ... | semmle.label | ... + ... |
22+
| RegexInjection.java:20:22:20:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
23+
| RegexInjection.java:23:24:23:30 | pattern | semmle.label | pattern |
24+
| RegexInjection.java:27:22:27:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
25+
| RegexInjection.java:30:31:30:37 | pattern | semmle.label | pattern |
26+
| RegexInjection.java:34:22:34:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
27+
| RegexInjection.java:37:29:37:35 | pattern | semmle.label | pattern |
28+
| RegexInjection.java:41:22:41:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
29+
| RegexInjection.java:44:34:44:40 | pattern | semmle.label | pattern |
30+
| RegexInjection.java:51:22:51:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
31+
| RegexInjection.java:54:28:54:34 | pattern | semmle.label | pattern |
32+
| RegexInjection.java:58:22:58:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
33+
| RegexInjection.java:61:28:61:34 | pattern | semmle.label | pattern |
34+
| RegexInjection.java:65:22:65:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
35+
| RegexInjection.java:68:26:68:52 | ... + ... | semmle.label | ... + ... |
36+
| RegexInjection.java:68:32:68:43 | foo(...) : String | semmle.label | foo(...) : String |
37+
| RegexInjection.java:68:36:68:42 | pattern : String | semmle.label | pattern : String |
38+
| RegexInjection.java:84:22:84:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
39+
| RegexInjection.java:90:26:90:47 | ... + ... | semmle.label | ... + ... |
40+
| RegexInjection.java:100:22:100:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
41+
| RegexInjection.java:103:40:103:46 | pattern | semmle.label | pattern |
42+
| RegexInjection.java:107:22:107:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
43+
| RegexInjection.java:110:42:110:48 | pattern | semmle.label | pattern |
44+
| RegexInjection.java:114:22:114:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
45+
| RegexInjection.java:117:44:117:50 | pattern | semmle.label | pattern |
46+
| RegexInjection.java:121:22:121:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
47+
| RegexInjection.java:124:41:124:47 | pattern | semmle.label | pattern |
48+
| RegexInjection.java:128:22:128:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
49+
| RegexInjection.java:131:43:131:49 | pattern | semmle.label | pattern |
50+
| RegexInjection.java:143:22:143:52 | getParameter(...) : String | semmle.label | getParameter(...) : String |
51+
| RegexInjection.java:146:45:146:51 | pattern | semmle.label | pattern |
52+
#select
53+
| RegexInjection.java:16:26:16:47 | ... + ... | RegexInjection.java:13:22:13:52 | getParameter(...) : String | RegexInjection.java:16:26:16:47 | ... + ... | $@ is user controlled. | RegexInjection.java:13:22:13:52 | getParameter(...) | This regular expression pattern |
54+
| RegexInjection.java:23:24:23:30 | pattern | RegexInjection.java:20:22:20:52 | getParameter(...) : String | RegexInjection.java:23:24:23:30 | pattern | $@ is user controlled. | RegexInjection.java:20:22:20:52 | getParameter(...) | This regular expression pattern |
55+
| RegexInjection.java:30:31:30:37 | pattern | RegexInjection.java:27:22:27:52 | getParameter(...) : String | RegexInjection.java:30:31:30:37 | pattern | $@ is user controlled. | RegexInjection.java:27:22:27:52 | getParameter(...) | This regular expression pattern |
56+
| RegexInjection.java:37:29:37:35 | pattern | RegexInjection.java:34:22:34:52 | getParameter(...) : String | RegexInjection.java:37:29:37:35 | pattern | $@ is user controlled. | RegexInjection.java:34:22:34:52 | getParameter(...) | This regular expression pattern |
57+
| RegexInjection.java:44:34:44:40 | pattern | RegexInjection.java:41:22:41:52 | getParameter(...) : String | RegexInjection.java:44:34:44:40 | pattern | $@ is user controlled. | RegexInjection.java:41:22:41:52 | getParameter(...) | This regular expression pattern |
58+
| RegexInjection.java:54:28:54:34 | pattern | RegexInjection.java:51:22:51:52 | getParameter(...) : String | RegexInjection.java:54:28:54:34 | pattern | $@ is user controlled. | RegexInjection.java:51:22:51:52 | getParameter(...) | This regular expression pattern |
59+
| RegexInjection.java:61:28:61:34 | pattern | RegexInjection.java:58:22:58:52 | getParameter(...) : String | RegexInjection.java:61:28:61:34 | pattern | $@ is user controlled. | RegexInjection.java:58:22:58:52 | getParameter(...) | This regular expression pattern |
60+
| RegexInjection.java:68:26:68:52 | ... + ... | RegexInjection.java:65:22:65:52 | getParameter(...) : String | RegexInjection.java:68:26:68:52 | ... + ... | $@ is user controlled. | RegexInjection.java:65:22:65:52 | getParameter(...) | This regular expression pattern |
61+
| RegexInjection.java:90:26:90:47 | ... + ... | RegexInjection.java:84:22:84:52 | getParameter(...) : String | RegexInjection.java:90:26:90:47 | ... + ... | $@ is user controlled. | RegexInjection.java:84:22:84:52 | getParameter(...) | This regular expression pattern |
62+
| RegexInjection.java:103:40:103:46 | pattern | RegexInjection.java:100:22:100:52 | getParameter(...) : String | RegexInjection.java:103:40:103:46 | pattern | $@ is user controlled. | RegexInjection.java:100:22:100:52 | getParameter(...) | This regular expression pattern |
63+
| RegexInjection.java:110:42:110:48 | pattern | RegexInjection.java:107:22:107:52 | getParameter(...) : String | RegexInjection.java:110:42:110:48 | pattern | $@ is user controlled. | RegexInjection.java:107:22:107:52 | getParameter(...) | This regular expression pattern |
64+
| RegexInjection.java:117:44:117:50 | pattern | RegexInjection.java:114:22:114:52 | getParameter(...) : String | RegexInjection.java:117:44:117:50 | pattern | $@ is user controlled. | RegexInjection.java:114:22:114:52 | getParameter(...) | This regular expression pattern |
65+
| RegexInjection.java:124:41:124:47 | pattern | RegexInjection.java:121:22:121:52 | getParameter(...) : String | RegexInjection.java:124:41:124:47 | pattern | $@ is user controlled. | RegexInjection.java:121:22:121:52 | getParameter(...) | This regular expression pattern |
66+
| RegexInjection.java:131:43:131:49 | pattern | RegexInjection.java:128:22:128:52 | getParameter(...) : String | RegexInjection.java:131:43:131:49 | pattern | $@ is user controlled. | RegexInjection.java:128:22:128:52 | getParameter(...) | This regular expression pattern |
67+
| RegexInjection.java:146:45:146:51 | pattern | RegexInjection.java:143:22:143:52 | getParameter(...) : String | RegexInjection.java:146:45:146:51 | pattern | $@ is user controlled. | RegexInjection.java:143:22:143:52 | getParameter(...) | This regular expression pattern |
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import java.util.regex.Matcher;
2+
import java.util.regex.Pattern;
3+
4+
import javax.servlet.http.HttpServlet;
5+
import javax.servlet.http.HttpServletRequest;
6+
import javax.servlet.http.HttpServletResponse;
7+
import javax.servlet.ServletException;
8+
9+
import org.apache.commons.lang3.RegExUtils;
10+
11+
public class RegexInjection extends HttpServlet {
12+
public boolean string1(javax.servlet.http.HttpServletRequest request) {
13+
String pattern = request.getParameter("pattern");
14+
String input = request.getParameter("input");
15+
16+
return input.matches("^" + pattern + "=.*$"); // BAD
17+
}
18+
19+
public boolean string2(javax.servlet.http.HttpServletRequest request) {
20+
String pattern = request.getParameter("pattern");
21+
String input = request.getParameter("input");
22+
23+
return input.split(pattern).length > 0; // BAD
24+
}
25+
26+
public boolean string3(javax.servlet.http.HttpServletRequest request) {
27+
String pattern = request.getParameter("pattern");
28+
String input = request.getParameter("input");
29+
30+
return input.replaceFirst(pattern, "").length() > 0; // BAD
31+
}
32+
33+
public boolean string4(javax.servlet.http.HttpServletRequest request) {
34+
String pattern = request.getParameter("pattern");
35+
String input = request.getParameter("input");
36+
37+
return input.replaceAll(pattern, "").length() > 0; // BAD
38+
}
39+
40+
public boolean pattern1(javax.servlet.http.HttpServletRequest request) {
41+
String pattern = request.getParameter("pattern");
42+
String input = request.getParameter("input");
43+
44+
Pattern pt = Pattern.compile(pattern);
45+
Matcher matcher = pt.matcher(input);
46+
47+
return matcher.find(); // BAD
48+
}
49+
50+
public boolean pattern2(javax.servlet.http.HttpServletRequest request) {
51+
String pattern = request.getParameter("pattern");
52+
String input = request.getParameter("input");
53+
54+
return Pattern.compile(pattern).matcher(input).matches(); // BAD
55+
}
56+
57+
public boolean pattern3(javax.servlet.http.HttpServletRequest request) {
58+
String pattern = request.getParameter("pattern");
59+
String input = request.getParameter("input");
60+
61+
return Pattern.matches(pattern, input); // BAD
62+
}
63+
64+
public boolean pattern4(javax.servlet.http.HttpServletRequest request) {
65+
String pattern = request.getParameter("pattern");
66+
String input = request.getParameter("input");
67+
68+
return input.matches("^" + foo(pattern) + "=.*$"); // BAD
69+
}
70+
71+
String foo(String str) {
72+
return str;
73+
}
74+
75+
public boolean pattern5(javax.servlet.http.HttpServletRequest request) {
76+
String pattern = request.getParameter("pattern");
77+
String input = request.getParameter("input");
78+
79+
// GOOD: User input is sanitized before constructing the regex
80+
return input.matches("^" + escapeSpecialRegexChars(pattern) + "=.*$");
81+
}
82+
83+
public boolean pattern6(javax.servlet.http.HttpServletRequest request) {
84+
String pattern = request.getParameter("pattern");
85+
String input = request.getParameter("input");
86+
87+
escapeSpecialRegexChars(pattern);
88+
89+
// BAD: the pattern is not really sanitized
90+
return input.matches("^" + pattern + "=.*$");
91+
}
92+
93+
Pattern SPECIAL_REGEX_CHARS = Pattern.compile("[{}()\\[\\]><-=!.+*?^$\\\\|]");
94+
95+
String escapeSpecialRegexChars(String str) {
96+
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
97+
}
98+
99+
public boolean apache1(javax.servlet.http.HttpServletRequest request) {
100+
String pattern = request.getParameter("pattern");
101+
String input = request.getParameter("input");
102+
103+
return RegExUtils.removeAll(input, pattern).length() > 0; // BAD
104+
}
105+
106+
public boolean apache2(javax.servlet.http.HttpServletRequest request) {
107+
String pattern = request.getParameter("pattern");
108+
String input = request.getParameter("input");
109+
110+
return RegExUtils.removeFirst(input, pattern).length() > 0; // BAD
111+
}
112+
113+
public boolean apache3(javax.servlet.http.HttpServletRequest request) {
114+
String pattern = request.getParameter("pattern");
115+
String input = request.getParameter("input");
116+
117+
return RegExUtils.removePattern(input, pattern).length() > 0; // BAD
118+
}
119+
120+
public boolean apache4(javax.servlet.http.HttpServletRequest request) {
121+
String pattern = request.getParameter("pattern");
122+
String input = request.getParameter("input");
123+
124+
return RegExUtils.replaceAll(input, pattern, "").length() > 0; // BAD
125+
}
126+
127+
public boolean apache5(javax.servlet.http.HttpServletRequest request) {
128+
String pattern = request.getParameter("pattern");
129+
String input = request.getParameter("input");
130+
131+
return RegExUtils.replaceFirst(input, pattern, "").length() > 0; // BAD
132+
}
133+
134+
public boolean apache6(javax.servlet.http.HttpServletRequest request) {
135+
String pattern = request.getParameter("pattern");
136+
String input = request.getParameter("input");
137+
138+
Pattern pt = (Pattern)(Object) pattern;
139+
return RegExUtils.replaceFirst(input, pt, "").length() > 0; // GOOD, Pattern compile is the sink instead
140+
}
141+
142+
public boolean apache7(javax.servlet.http.HttpServletRequest request) {
143+
String pattern = request.getParameter("pattern");
144+
String input = request.getParameter("input");
145+
146+
return RegExUtils.replacePattern(input, pattern, "").length() > 0; // BAD
147+
}
148+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/Security/CWE/CWE-730/RegexInjection.ql

0 commit comments

Comments
 (0)