Skip to content

Commit 9ed61e7

Browse files
committed
Python: Port py/polynomial-redos to use proper source/sink customization
I noticed the configuration/customization files are in the `performance` folder in JS, but I just kept them in place, since that seems correct to me.
1 parent cea2f82 commit 9ed61e7

File tree

3 files changed

+234
-164
lines changed

3 files changed

+234
-164
lines changed

python/ql/src/Security/CWE-730/PolynomialReDoS.ql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ import semmle.python.security.dataflow.PolynomialReDoS
1717
import DataFlow::PathGraph
1818

1919
from
20-
PolynomialReDoSConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink,
21-
PolynomialReDoSSink sinkNode, PolynomialBackTrackingTerm regexp
20+
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
21+
PolynomialReDoS::Sink sinkNode, PolynomialBackTrackingTerm regexp
2222
where
2323
config.hasFlowPath(source, sink) and
2424
sinkNode = sink.getNode() and
Lines changed: 20 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -1,177 +1,35 @@
11
/**
2-
* Provides a taint-tracking configuration for detecting polynomial regular expression denial of service (ReDoS)
3-
* vulnerabilities.
2+
* Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
3+
*
4+
* Note, for performance reasons: only import this file if
5+
* `PolynomialReDoS::Configuration` is needed, otherwise
6+
* `PolynomialReDoSCustomizations` should be imported instead.
47
*/
58

6-
import python
9+
private import python
710
import semmle.python.dataflow.new.DataFlow
8-
import semmle.python.dataflow.new.DataFlow2
911
import semmle.python.dataflow.new.TaintTracking
10-
import semmle.python.Concepts
11-
import semmle.python.dataflow.new.RemoteFlowSources
12-
import semmle.python.dataflow.new.BarrierGuards
13-
import semmle.python.RegexTreeView
14-
import semmle.python.ApiGraphs
15-
16-
/** A configuration for finding uses of compiled regexes. */
17-
class RegexDefinitionConfiguration extends DataFlow2::Configuration {
18-
RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" }
19-
20-
override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource }
21-
22-
override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink }
23-
}
24-
25-
/** A regex compilation. */
26-
class RegexDefinitonSource extends DataFlow::CallCfgNode {
27-
DataFlow::Node regexNode;
28-
29-
RegexDefinitonSource() {
30-
this = API::moduleImport("re").getMember("compile").getACall() and
31-
regexNode in [this.getArg(0), this.getArgByName("pattern")]
32-
}
33-
34-
/** Gets the regex that is being compiled by this node. */
35-
RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() }
36-
37-
/** Gets the data flow node for the regex being compiled by this node. */
38-
DataFlow::Node getRegexNode() { result = regexNode }
39-
}
40-
41-
/** A use of a compiled regex. */
42-
class RegexDefinitionSink extends DataFlow::Node {
43-
RegexExecutionMethod method;
44-
DataFlow::CallCfgNode executingCall;
45-
46-
RegexDefinitionSink() {
47-
exists(DataFlow::AttrRead reMethod |
48-
executingCall.getFunction() = reMethod and
49-
reMethod.getAttributeName() = method and
50-
this = reMethod.getObject()
51-
)
52-
}
53-
54-
/** Gets the method used to execute the regex. */
55-
RegexExecutionMethod getMethod() { result = method }
56-
57-
/** Gets the data flow node for the executing call. */
58-
DataFlow::CallCfgNode getExecutingCall() { result = executingCall }
59-
}
60-
61-
/**
62-
* A taint-tracking configuration for detecting regular expression denial-of-service vulnerabilities.
63-
*/
64-
class PolynomialReDoSConfiguration extends TaintTracking::Configuration {
65-
PolynomialReDoSConfiguration() { this = "PolynomialReDoSConfiguration" }
66-
67-
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
68-
69-
override predicate isSink(DataFlow::Node sink) { sink instanceof PolynomialReDoSSink }
70-
}
71-
72-
/** A data flow node executing a regex. */
73-
abstract class RegexExecution extends DataFlow::Node {
74-
/** Gets the data flow node for the regex being compiled by this node. */
75-
abstract DataFlow::Node getRegexNode();
76-
77-
/** Gets a dataflow node for the string to be searched or matched against. */
78-
abstract DataFlow::Node getString();
79-
}
80-
81-
private class RegexExecutionMethod extends string {
82-
RegexExecutionMethod() {
83-
this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"]
84-
}
85-
}
86-
87-
/** Gets the index of the argument representing the string to be searched by a regex. */
88-
int stringArg(RegexExecutionMethod method) {
89-
method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and
90-
result = 1
91-
or
92-
method in ["sub", "subn"] and
93-
result = 2
94-
}
9512

9613
/**
97-
* A class to find `re` methods immediately executing an expression.
98-
*
99-
* See `RegexExecutionMethods`
14+
* Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
10015
*/
101-
class DirectRegex extends DataFlow::CallCfgNode, RegexExecution {
102-
RegexExecutionMethod method;
103-
104-
DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() }
16+
module PolynomialReDoS {
17+
import PolynomialReDoSCustomizations::PolynomialReDoS
10518

106-
override DataFlow::Node getRegexNode() {
107-
result in [this.getArg(0), this.getArgByName("pattern")]
108-
}
109-
110-
override DataFlow::Node getString() {
111-
result in [this.getArg(stringArg(method)), this.getArgByName("string")]
112-
}
113-
}
114-
115-
/**
116-
* A class to find `re` methods immediately executing a compiled expression by `re.compile`.
117-
*
118-
* Given the following example:
119-
*
120-
* ```py
121-
* pattern = re.compile(input)
122-
* pattern.match(s)
123-
* ```
124-
*
125-
* This class will identify that `re.compile` compiles `input` and afterwards
126-
* executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)`
127-
* and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument)
128-
*
129-
*
130-
* See `RegexExecutionMethods`
131-
*
132-
* See https://docs.python.org/3/library/re.html#regular-expression-objects
133-
*/
134-
private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution {
135-
DataFlow::Node regexNode;
136-
RegexExecutionMethod method;
137-
138-
CompiledRegex() {
139-
exists(
140-
RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink
141-
|
142-
conf.hasFlow(source, sink) and
143-
regexNode = source.getRegexNode() and
144-
method = sink.getMethod() and
145-
this = sink.getExecutingCall()
146-
)
147-
}
19+
/**
20+
* A taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities.
21+
*/
22+
class Configuration extends TaintTracking::Configuration {
23+
Configuration() { this = "PolynomialReDoS" }
14824

149-
override DataFlow::Node getRegexNode() { result = regexNode }
25+
override predicate isSource(DataFlow::Node source) { source instanceof Source }
15026

151-
override DataFlow::Node getString() {
152-
result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")]
153-
}
154-
}
27+
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
15528

156-
/**
157-
* A data flow sink node for polynomial regular expression denial-of-service vulnerabilities.
158-
*/
159-
class PolynomialReDoSSink extends DataFlow::Node {
160-
RegExpTerm t;
29+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
16130

162-
PolynomialReDoSSink() {
163-
exists(RegexExecution re |
164-
re.getRegexNode().asExpr() = t.getRegex() and
165-
this = re.getString()
166-
) and
167-
t.isRootTerm()
31+
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
32+
guard instanceof SanitizerGuard
33+
}
16834
}
169-
170-
/** Gets the regex that is being executed by this node. */
171-
RegExpTerm getRegExp() { result = t }
172-
173-
/**
174-
* Gets the node to highlight in the alert message.
175-
*/
176-
DataFlow::Node getHighlight() { result = this }
17735
}

0 commit comments

Comments
 (0)