Skip to content

Commit 6c82dae

Browse files
committed
Python: Move Regexinjection out of experimental
and fix up structure
1 parent 3d5192d commit 6c82dae

File tree

6 files changed

+197
-0
lines changed

6 files changed

+197
-0
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* Provides a taint-tracking configuration for detecting regular expression injection
3+
* vulnerabilities.
4+
*
5+
* Note, for performance reasons: only import this file if
6+
* `RegexInjection::Configuration` is needed, otherwise
7+
* `RegexInjectionCustomizations` should be imported instead.
8+
*/
9+
10+
private import python
11+
import semmle.python.dataflow.new.DataFlow
12+
import semmle.python.dataflow.new.TaintTracking
13+
14+
/**
15+
* Provides a taint-tracking configuration for detecting regular expression injection
16+
* vulnerabilities.
17+
*/
18+
module RegexInjection {
19+
import RegexInjectionCustomizations::RegexInjection
20+
21+
/**
22+
* A taint-tracking configuration for detecting "reflected server-side cross-site scripting" vulnerabilities.
23+
*/
24+
class Configuration extends TaintTracking::Configuration {
25+
Configuration() { this = "RegexInjection" }
26+
27+
override predicate isSource(DataFlow::Node source) { source instanceof Source }
28+
29+
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
30+
31+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
32+
33+
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
34+
guard instanceof SanitizerGuard
35+
}
36+
}
37+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for detecting
3+
* "regular expression injection"
4+
* vulnerabilities, as well as extension points for adding your own.
5+
*/
6+
7+
private import python
8+
private import semmle.python.Concepts
9+
private import semmle.python.dataflow.new.DataFlow
10+
private import semmle.python.dataflow.new.TaintTracking
11+
private import semmle.python.dataflow.new.RemoteFlowSources
12+
13+
/**
14+
* Provides default sources, sinks and sanitizers for detecting
15+
* "regular expression injection"
16+
* vulnerabilities, as well as extension points for adding your own.
17+
*/
18+
module RegexInjection {
19+
/**
20+
* A data flow source for "regular expression injection" vulnerabilities.
21+
*/
22+
abstract class Source extends DataFlow::Node { }
23+
24+
/**
25+
* A sink for "regular expression injection" vulnerabilities is the execution of a regular expression.
26+
* If you have a custom way to execute regular expressions, you can extend `RegexExecution::Range`.
27+
*/
28+
class Sink extends RegexExecution { }
29+
30+
/**
31+
* A sanitizer for "regular expression injection" vulnerabilities.
32+
*/
33+
abstract class Sanitizer extends DataFlow::Node { }
34+
35+
/**
36+
* A sanitizer guard for "regular expression injection" vulnerabilities.
37+
*/
38+
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
39+
40+
/**
41+
* A source of remote user input, considered as a flow source.
42+
*/
43+
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
44+
45+
/**
46+
* A regex escaping, considered as a sanitizer.
47+
*/
48+
class RegexEscapingAsSanitizer extends Sanitizer {
49+
RegexEscapingAsSanitizer() {
50+
// Due to use-use flow, we want the output rather than an input
51+
// (so the input can still flow to other sinks).
52+
this = any(RegexEscaping esc).getOutput()
53+
}
54+
}
55+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>
7+
Constructing a regular expression with unsanitized user input is dangerous as a malicious user may
8+
be able to modify the meaning of the expression. In particular, such a user may be able to provide
9+
a regular expression fragment that takes exponential time in the worst case, and use that to
10+
perform a Denial of Service attack.
11+
</p>
12+
</overview>
13+
14+
<recommendation>
15+
<p>
16+
Before embedding user input into a regular expression, use a sanitization function such as
17+
<code>re.escape</code> to escape meta-characters that have a special meaning regarding
18+
regular expressions' syntax.
19+
</p>
20+
</recommendation>
21+
22+
<example>
23+
<p>
24+
The following examples are based on a simple Flask web server environment.
25+
</p>
26+
<p>
27+
The following example shows a HTTP request parameter that is used to construct a regular expression
28+
without sanitizing it first:
29+
</p>
30+
<sample src="re_bad.py" />
31+
<p>
32+
Instead, the request parameter should be sanitized first, for example using the function
33+
<code>re.escape</code>. This ensures that the user cannot insert characters which have a
34+
special meaning in regular expressions.
35+
</p>
36+
<sample src="re_good.py" />
37+
</example>
38+
39+
<references>
40+
<li>OWASP: <a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.</li>
41+
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.</li>
42+
<li>Python docs: <a href="https://docs.python.org/3/library/re.html">re</a>.</li>
43+
<li>SonarSource: <a href="https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2631">RSPEC-2631</a>.</li>
44+
</references>
45+
</qhelp>
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* @name Regular expression injection
3+
* @description User input should not be used in regular expressions without first being escaped,
4+
* otherwise a malicious user may be able to inject an expression that could require
5+
* exponential time on certain inputs.
6+
* @kind path-problem
7+
* @problem.severity error
8+
* @id py/regex-injection
9+
* @tags security
10+
* external/cwe/cwe-730
11+
* external/cwe/cwe-400
12+
*/
13+
14+
// determine precision above
15+
import python
16+
import semmle.python.security.injection.RegexInjection
17+
import DataFlow::PathGraph
18+
19+
from
20+
RegexInjection::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
21+
RegexInjection::Sink regexInjectionSink
22+
where
23+
config.hasFlowPath(source, sink) and
24+
regexInjectionSink = sink.getNode()
25+
select sink.getNode(), source, sink,
26+
"$@ regular expression is constructed from a $@ and executed by $@.",
27+
regexInjectionSink.getRegexNode(), "This", source.getNode(), "user-provided value",
28+
regexInjectionSink, regexInjectionSink.getName()
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from flask import request, Flask
2+
import re
3+
4+
5+
@app.route("/direct")
6+
def direct():
7+
unsafe_pattern = request.args["pattern"]
8+
re.search(unsafe_pattern, "")
9+
10+
11+
@app.route("/compile")
12+
def compile():
13+
unsafe_pattern = request.args["pattern"]
14+
compiled_pattern = re.compile(unsafe_pattern)
15+
compiled_pattern.search("")
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from flask import request, Flask
2+
import re
3+
4+
5+
@app.route("/direct")
6+
def direct():
7+
unsafe_pattern = request.args['pattern']
8+
safe_pattern = re.escape(unsafe_pattern)
9+
re.search(safe_pattern, "")
10+
11+
12+
@app.route("/compile")
13+
def compile():
14+
unsafe_pattern = request.args['pattern']
15+
safe_pattern = re.escape(unsafe_pattern)
16+
compiled_pattern = re.compile(safe_pattern)
17+
compiled_pattern.search("")

0 commit comments

Comments
 (0)