Skip to content

Commit 0e5a2c4

Browse files
authored
Merge pull request github#5442 from jorgectf/jorgectf/python/redos
Python: Add Regular Expression Injection query
2 parents 549c9ee + 78370cf commit 0e5a2c4

File tree

11 files changed

+428
-0
lines changed

11 files changed

+428
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>
7+
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
8+
be able to modify the meaning of the expression. In particular, such a user may be able to provide
9+
a regular expression fragment that takes exponential time in the worst case, and use that to
10+
perform a Denial of Service attack.
11+
</p>
12+
</overview>
13+
14+
<recommendation>
15+
<p>
16+
Before embedding user input into a regular expression, use a sanitization function such as
17+
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
18+
regular expressions' syntax.
19+
</p>
20+
</recommendation>
21+
22+
<example>
23+
<p>
24+
The following examples are based on a simple Flask web server environment.
25+
</p>
26+
<p>
27+
The following example shows a HTTP request parameter that is used to construct a regular expression
28+
without sanitizing it first:
29+
</p>
30+
<sample src="re_bad.py" />
31+
<p>
32+
Instead, the request parameter should be sanitized first, for example using the function
33+
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
34+
special meaning in regular expressions.
35+
</p>
36+
<sample src="re_good.py" />
37+
</example>
38+
39+
<references>
40+
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
41+
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
42+
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
43+
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
44+
</references>
45+
</qhelp>
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/**
2+
* @name Regular expression injection
3+
* @description User input should not be used in regular expressions without first being escaped,
4+
* otherwise a malicious user may be able to inject an expression that could require
5+
* exponential time on certain inputs.
6+
* @kind path-problem
7+
* @problem.severity error
8+
* @id py/regex-injection
9+
* @tags security
10+
* external/cwe/cwe-730
11+
* external/cwe/cwe-400
12+
*/
13+
14+
// determine precision above
15+
import python
16+
import experimental.semmle.python.security.injection.RegexInjection
17+
import DataFlow::PathGraph
18+
19+
from
20+
RegexInjectionFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink,
21+
RegexInjectionSink regexInjectionSink, Attribute methodAttribute
22+
where
23+
config.hasFlowPath(source, sink) and
24+
regexInjectionSink = sink.getNode() and
25+
methodAttribute = regexInjectionSink.getRegexMethod()
26+
select sink.getNode(), source, sink,
27+
"$@ regular expression is constructed from a $@ and executed by $@.", sink.getNode(), "This",
28+
source.getNode(), "user-provided value", methodAttribute,
29+
regexInjectionSink.getRegexModule() + "." + methodAttribute.getName()
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from flask import request, Flask
2+
import re
3+
4+
5+
@app.route("/direct")
6+
def direct():
7+
unsafe_pattern = request.args["pattern"]
8+
re.search(unsafe_pattern, "")
9+
10+
11+
@app.route("/compile")
12+
def compile():
13+
unsafe_pattern = request.args["pattern"]
14+
compiled_pattern = re.compile(unsafe_pattern)
15+
compiled_pattern.search("")
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from flask import request, Flask
2+
import re
3+
4+
5+
@app.route("/direct")
6+
def direct():
7+
unsafe_pattern = request.args['pattern']
8+
safe_pattern = re.escape(unsafe_pattern)
9+
re.search(safe_pattern, "")
10+
11+
12+
@app.route("/compile")
13+
def compile():
14+
unsafe_pattern = request.args['pattern']
15+
safe_pattern = re.escape(unsafe_pattern)
16+
compiled_pattern = re.compile(safe_pattern)
17+
compiled_pattern.search("")

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,70 @@ private import semmle.python.dataflow.new.DataFlow
1313
private import semmle.python.dataflow.new.RemoteFlowSources
1414
private import semmle.python.dataflow.new.TaintTracking
1515
private import experimental.semmle.python.Frameworks
16+
17+
/** Provides classes for modeling Regular Expression-related APIs. */
18+
module RegexExecution {
19+
/**
20+
* A data-flow node that executes a regular expression.
21+
*
22+
* Extend this class to model new APIs. If you want to refine existing API models,
23+
* extend `RegexExecution` instead.
24+
*/
25+
abstract class Range extends DataFlow::Node {
26+
/**
27+
* Gets the argument containing the executed expression.
28+
*/
29+
abstract DataFlow::Node getRegexNode();
30+
31+
/**
32+
* Gets the library used to execute the regular expression.
33+
*/
34+
abstract string getRegexModule();
35+
}
36+
}
37+
38+
/**
39+
* A data-flow node that executes a regular expression.
40+
*
41+
* Extend this class to refine existing API models. If you want to model new APIs,
42+
* extend `RegexExecution::Range` instead.
43+
*/
44+
class RegexExecution extends DataFlow::Node {
45+
RegexExecution::Range range;
46+
47+
RegexExecution() { this = range }
48+
49+
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
50+
51+
string getRegexModule() { result = range.getRegexModule() }
52+
}
53+
54+
/** Provides classes for modeling Regular Expression escape-related APIs. */
55+
module RegexEscape {
56+
/**
57+
* A data-flow node that escapes a regular expression.
58+
*
59+
* Extend this class to model new APIs. If you want to refine existing API models,
60+
* extend `RegexEscape` instead.
61+
*/
62+
abstract class Range extends DataFlow::Node {
63+
/**
64+
* Gets the argument containing the escaped expression.
65+
*/
66+
abstract DataFlow::Node getRegexNode();
67+
}
68+
}
69+
70+
/**
71+
* A data-flow node that escapes a regular expression.
72+
*
73+
* Extend this class to refine existing API models. If you want to model new APIs,
74+
* extend `RegexEscape::Range` instead.
75+
*/
76+
class RegexEscape extends DataFlow::Node {
77+
RegexEscape::Range range;
78+
79+
RegexEscape() { this = range }
80+
81+
DataFlow::Node getRegexNode() { result = range.getRegexNode() }
82+
}

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,92 @@ private import semmle.python.dataflow.new.TaintTracking
99
private import semmle.python.dataflow.new.RemoteFlowSources
1010
private import experimental.semmle.python.Concepts
1111
private import semmle.python.ApiGraphs
12+
13+
/**
14+
* Provides models for Python's `re` library.
15+
*
16+
* See https://docs.python.org/3/library/re.html
17+
*/
18+
private module Re {
19+
/**
20+
* List of `re` methods immediately executing an expression.
21+
*
22+
* See https://docs.python.org/3/library/re.html#module-contents
23+
*/
24+
private class RegexExecutionMethods extends string {
25+
RegexExecutionMethods() {
26+
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
27+
}
28+
}
29+
30+
/**
31+
* A class to find `re` methods immediately executing an expression.
32+
*
33+
* See `RegexExecutionMethods`
34+
*/
35+
private class DirectRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
36+
DataFlow::Node regexNode;
37+
38+
DirectRegex() {
39+
this = API::moduleImport("re").getMember(any(RegexExecutionMethods m)).getACall() and
40+
regexNode = this.getArg(0)
41+
}
42+
43+
override DataFlow::Node getRegexNode() { result = regexNode }
44+
45+
override string getRegexModule() { result = "re" }
46+
}
47+
48+
/**
49+
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
50+
*
51+
* Given the following example:
52+
*
53+
* ```py
54+
* pattern = re.compile(input)
55+
* pattern.match(s)
56+
* ```
57+
*
58+
* This class will identify that `re.compile` compiles `input` and afterwards
59+
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
60+
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
61+
*
62+
*
63+
* See `RegexExecutionMethods`
64+
*
65+
* See https://docs.python.org/3/library/re.html#regular-expression-objects
66+
*/
67+
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution::Range {
68+
DataFlow::Node regexNode;
69+
70+
CompiledRegex() {
71+
exists(DataFlow::CallCfgNode patternCall, DataFlow::AttrRead reMethod |
72+
this.getFunction() = reMethod and
73+
patternCall = API::moduleImport("re").getMember("compile").getACall() and
74+
patternCall.flowsTo(reMethod.getObject()) and
75+
reMethod.getAttributeName() instanceof RegexExecutionMethods and
76+
regexNode = patternCall.getArg(0)
77+
)
78+
}
79+
80+
override DataFlow::Node getRegexNode() { result = regexNode }
81+
82+
override string getRegexModule() { result = "re" }
83+
}
84+
85+
/**
86+
* A class to find `re` methods escaping an expression.
87+
*
88+
* See https://docs.python.org/3/library/re.html#re.escape
89+
*/
90+
class ReEscape extends DataFlow::CallCfgNode, RegexEscape::Range {
91+
DataFlow::Node regexNode;
92+
93+
ReEscape() {
94+
this = API::moduleImport("re").getMember("escape").getACall() and
95+
regexNode = this.getArg(0)
96+
}
97+
98+
override DataFlow::Node getRegexNode() { result = regexNode }
99+
}
100+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/**
2+
* Provides a taint-tracking configuration for detecting regular expression injection
3+
* vulnerabilities.
4+
*/
5+
6+
import python
7+
import experimental.semmle.python.Concepts
8+
import semmle.python.dataflow.new.DataFlow
9+
import semmle.python.dataflow.new.TaintTracking
10+
import semmle.python.dataflow.new.RemoteFlowSources
11+
12+
/**
13+
* A class to find methods executing regular expressions.
14+
*
15+
* See `RegexExecution`
16+
*/
17+
class RegexInjectionSink extends DataFlow::Node {
18+
string regexModule;
19+
Attribute regexMethod;
20+
21+
RegexInjectionSink() {
22+
exists(RegexExecution reExec |
23+
this = reExec.getRegexNode() and
24+
regexModule = reExec.getRegexModule() and
25+
regexMethod = reExec.(DataFlow::CallCfgNode).getFunction().asExpr().(Attribute)
26+
)
27+
}
28+
29+
/**
30+
* Gets the argument containing the executed expression.
31+
*/
32+
string getRegexModule() { result = regexModule }
33+
34+
/**
35+
* Gets the method used to execute the regular expression.
36+
*/
37+
Attribute getRegexMethod() { result = regexMethod }
38+
}
39+
40+
/**
41+
* A taint-tracking configuration for detecting regular expression injections.
42+
*/
43+
class RegexInjectionFlowConfig extends TaintTracking::Configuration {
44+
RegexInjectionFlowConfig() { this = "RegexInjectionFlowConfig" }
45+
46+
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
47+
48+
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
49+
50+
override predicate isSanitizer(DataFlow::Node sanitizer) {
51+
sanitizer = any(RegexEscape reEscape).getRegexNode()
52+
}
53+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
edges
2+
| re_bad.py:13:22:13:28 | ControlFlowNode for request | re_bad.py:13:22:13:33 | ControlFlowNode for Attribute |
3+
| re_bad.py:13:22:13:33 | ControlFlowNode for Attribute | re_bad.py:13:22:13:44 | ControlFlowNode for Subscript |
4+
| re_bad.py:13:22:13:44 | ControlFlowNode for Subscript | re_bad.py:14:15:14:28 | ControlFlowNode for unsafe_pattern |
5+
| re_bad.py:24:22:24:28 | ControlFlowNode for request | re_bad.py:24:22:24:33 | ControlFlowNode for Attribute |
6+
| re_bad.py:24:22:24:33 | ControlFlowNode for Attribute | re_bad.py:24:22:24:44 | ControlFlowNode for Subscript |
7+
| re_bad.py:24:22:24:44 | ControlFlowNode for Subscript | re_bad.py:25:35:25:48 | ControlFlowNode for unsafe_pattern |
8+
| re_bad.py:36:22:36:28 | ControlFlowNode for request | re_bad.py:36:22:36:33 | ControlFlowNode for Attribute |
9+
| re_bad.py:36:22:36:33 | ControlFlowNode for Attribute | re_bad.py:36:22:36:44 | ControlFlowNode for Subscript |
10+
| re_bad.py:36:22:36:44 | ControlFlowNode for Subscript | re_bad.py:37:16:37:29 | ControlFlowNode for unsafe_pattern |
11+
nodes
12+
| re_bad.py:13:22:13:28 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
13+
| re_bad.py:13:22:13:33 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
14+
| re_bad.py:13:22:13:44 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
15+
| re_bad.py:14:15:14:28 | ControlFlowNode for unsafe_pattern | semmle.label | ControlFlowNode for unsafe_pattern |
16+
| re_bad.py:24:22:24:28 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
17+
| re_bad.py:24:22:24:33 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
18+
| re_bad.py:24:22:24:44 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
19+
| re_bad.py:25:35:25:48 | ControlFlowNode for unsafe_pattern | semmle.label | ControlFlowNode for unsafe_pattern |
20+
| re_bad.py:36:22:36:28 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
21+
| re_bad.py:36:22:36:33 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
22+
| re_bad.py:36:22:36:44 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
23+
| re_bad.py:37:16:37:29 | ControlFlowNode for unsafe_pattern | semmle.label | ControlFlowNode for unsafe_pattern |
24+
#select
25+
| re_bad.py:14:15:14:28 | ControlFlowNode for unsafe_pattern | re_bad.py:13:22:13:28 | ControlFlowNode for request | re_bad.py:14:15:14:28 | ControlFlowNode for unsafe_pattern | $@ regular expression is constructed from a $@ and executed by $@. | re_bad.py:14:15:14:28 | ControlFlowNode for unsafe_pattern | This | re_bad.py:13:22:13:28 | ControlFlowNode for request | user-provided value | re_bad.py:14:5:14:13 | Attribute | re.search |
26+
| re_bad.py:25:35:25:48 | ControlFlowNode for unsafe_pattern | re_bad.py:24:22:24:28 | ControlFlowNode for request | re_bad.py:25:35:25:48 | ControlFlowNode for unsafe_pattern | $@ regular expression is constructed from a $@ and executed by $@. | re_bad.py:25:35:25:48 | ControlFlowNode for unsafe_pattern | This | re_bad.py:24:22:24:28 | ControlFlowNode for request | user-provided value | re_bad.py:26:5:26:27 | Attribute | re.search |
27+
| re_bad.py:37:16:37:29 | ControlFlowNode for unsafe_pattern | re_bad.py:36:22:36:28 | ControlFlowNode for request | re_bad.py:37:16:37:29 | ControlFlowNode for unsafe_pattern | $@ regular expression is constructed from a $@ and executed by $@. | re_bad.py:37:16:37:29 | ControlFlowNode for unsafe_pattern | This | re_bad.py:36:22:36:28 | ControlFlowNode for request | user-provided value | re_bad.py:37:5:37:37 | Attribute | re.search |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/Security/CWE-730/RegexInjection.ql
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from flask import request, Flask
2+
import re
3+
4+
app = Flask(__name__)
5+
6+
7+
@app.route("/direct")
8+
def direct():
9+
"""
10+
A RemoteFlowSource is used directly as re.search's pattern
11+
"""
12+
13+
unsafe_pattern = request.args["pattern"]
14+
re.search(unsafe_pattern, "")
15+
16+
17+
@app.route("/compile")
18+
def compile():
19+
"""
20+
A RemoteFlowSource is used directly as re.compile's pattern
21+
which also executes .search()
22+
"""
23+
24+
unsafe_pattern = request.args["pattern"]
25+
compiled_pattern = re.compile(unsafe_pattern)
26+
compiled_pattern.search("")
27+
28+
29+
@app.route("/compile_direct")
30+
def compile_direct():
31+
"""
32+
A RemoteFlowSource is used directly as re.compile's pattern
33+
which also executes .search() in the same line
34+
"""
35+
36+
unsafe_pattern = request.args["pattern"]
37+
re.compile(unsafe_pattern).search("")
38+
39+
# if __name__ == "__main__":
40+
# app.run(debug=True)

0 commit comments

Comments
 (0)