|
1 | 1 | /**
|
2 |
| - * Provides a taint-tracking configuration for detecting polynomial regular expression denial of service (ReDoS) |
3 |
| - * vulnerabilities. |
| 2 | + * Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities. |
| 3 | + * |
| 4 | + * Note, for performance reasons: only import this file if |
| 5 | + * `PolynomialReDoS::Configuration` is needed, otherwise |
| 6 | + * `PolynomialReDoSCustomizations` should be imported instead. |
4 | 7 | */
|
5 | 8 |
|
6 |
| -import python |
| 9 | +private import python |
7 | 10 | import semmle.python.dataflow.new.DataFlow
|
8 |
| -import semmle.python.dataflow.new.DataFlow2 |
9 | 11 | import semmle.python.dataflow.new.TaintTracking
|
10 |
| -import semmle.python.Concepts |
11 |
| -import semmle.python.dataflow.new.RemoteFlowSources |
12 |
| -import semmle.python.dataflow.new.BarrierGuards |
13 |
| -import semmle.python.RegexTreeView |
14 |
| -import semmle.python.ApiGraphs |
15 |
| - |
16 |
| -/** A configuration for finding uses of compiled regexes. */ |
17 |
| -class RegexDefinitionConfiguration extends DataFlow2::Configuration { |
18 |
| - RegexDefinitionConfiguration() { this = "RegexDefinitionConfiguration" } |
19 |
| - |
20 |
| - override predicate isSource(DataFlow::Node source) { source instanceof RegexDefinitonSource } |
21 |
| - |
22 |
| - override predicate isSink(DataFlow::Node sink) { sink instanceof RegexDefinitionSink } |
23 |
| -} |
24 |
| - |
25 |
| -/** A regex compilation. */ |
26 |
| -class RegexDefinitonSource extends DataFlow::CallCfgNode { |
27 |
| - DataFlow::Node regexNode; |
28 |
| - |
29 |
| - RegexDefinitonSource() { |
30 |
| - this = API::moduleImport("re").getMember("compile").getACall() and |
31 |
| - regexNode in [this.getArg(0), this.getArgByName("pattern")] |
32 |
| - } |
33 |
| - |
34 |
| - /** Gets the regex that is being compiled by this node. */ |
35 |
| - RegExpTerm getRegExp() { result.getRegex() = regexNode.asExpr() and result.isRootTerm() } |
36 |
| - |
37 |
| - /** Gets the data flow node for the regex being compiled by this node. */ |
38 |
| - DataFlow::Node getRegexNode() { result = regexNode } |
39 |
| -} |
40 |
| - |
41 |
| -/** A use of a compiled regex. */ |
42 |
| -class RegexDefinitionSink extends DataFlow::Node { |
43 |
| - RegexExecutionMethod method; |
44 |
| - DataFlow::CallCfgNode executingCall; |
45 |
| - |
46 |
| - RegexDefinitionSink() { |
47 |
| - exists(DataFlow::AttrRead reMethod | |
48 |
| - executingCall.getFunction() = reMethod and |
49 |
| - reMethod.getAttributeName() = method and |
50 |
| - this = reMethod.getObject() |
51 |
| - ) |
52 |
| - } |
53 |
| - |
54 |
| - /** Gets the method used to execute the regex. */ |
55 |
| - RegexExecutionMethod getMethod() { result = method } |
56 |
| - |
57 |
| - /** Gets the data flow node for the executing call. */ |
58 |
| - DataFlow::CallCfgNode getExecutingCall() { result = executingCall } |
59 |
| -} |
60 |
| - |
61 |
| -/** |
62 |
| - * A taint-tracking configuration for detecting regular expression denial-of-service vulnerabilities. |
63 |
| - */ |
64 |
| -class PolynomialReDoSConfiguration extends TaintTracking::Configuration { |
65 |
| - PolynomialReDoSConfiguration() { this = "PolynomialReDoSConfiguration" } |
66 |
| - |
67 |
| - override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource } |
68 |
| - |
69 |
| - override predicate isSink(DataFlow::Node sink) { sink instanceof PolynomialReDoSSink } |
70 |
| -} |
71 |
| - |
72 |
| -/** A data flow node executing a regex. */ |
73 |
| -abstract class RegexExecution extends DataFlow::Node { |
74 |
| - /** Gets the data flow node for the regex being compiled by this node. */ |
75 |
| - abstract DataFlow::Node getRegexNode(); |
76 |
| - |
77 |
| - /** Gets a dataflow node for the string to be searched or matched against. */ |
78 |
| - abstract DataFlow::Node getString(); |
79 |
| -} |
80 |
| - |
81 |
| -private class RegexExecutionMethod extends string { |
82 |
| - RegexExecutionMethod() { |
83 |
| - this in ["match", "fullmatch", "search", "split", "findall", "finditer", "sub", "subn"] |
84 |
| - } |
85 |
| -} |
86 |
| - |
87 |
| -/** Gets the index of the argument representing the string to be searched by a regex. */ |
88 |
| -int stringArg(RegexExecutionMethod method) { |
89 |
| - method in ["match", "fullmatch", "search", "split", "findall", "finditer"] and |
90 |
| - result = 1 |
91 |
| - or |
92 |
| - method in ["sub", "subn"] and |
93 |
| - result = 2 |
94 |
| -} |
95 | 12 |
|
96 | 13 | /**
|
97 |
| - * A class to find `re` methods immediately executing an expression. |
98 |
| - * |
99 |
| - * See `RegexExecutionMethods` |
| 14 | + * Provides a taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities. |
100 | 15 | */
|
101 |
| -class DirectRegex extends DataFlow::CallCfgNode, RegexExecution { |
102 |
| - RegexExecutionMethod method; |
103 |
| - |
104 |
| - DirectRegex() { this = API::moduleImport("re").getMember(method).getACall() } |
| 16 | +module PolynomialReDoS { |
| 17 | + import PolynomialReDoSCustomizations::PolynomialReDoS |
105 | 18 |
|
106 |
| - override DataFlow::Node getRegexNode() { |
107 |
| - result in [this.getArg(0), this.getArgByName("pattern")] |
108 |
| - } |
109 |
| - |
110 |
| - override DataFlow::Node getString() { |
111 |
| - result in [this.getArg(stringArg(method)), this.getArgByName("string")] |
112 |
| - } |
113 |
| -} |
114 |
| - |
115 |
| -/** |
116 |
| - * A class to find `re` methods immediately executing a compiled expression by `re.compile`. |
117 |
| - * |
118 |
| - * Given the following example: |
119 |
| - * |
120 |
| - * ```py |
121 |
| - * pattern = re.compile(input) |
122 |
| - * pattern.match(s) |
123 |
| - * ``` |
124 |
| - * |
125 |
| - * This class will identify that `re.compile` compiles `input` and afterwards |
126 |
| - * executes `re`'s `match`. As a result, `this` will refer to `pattern.match(s)` |
127 |
| - * and `this.getRegexNode()` will return the node for `input` (`re.compile`'s first argument) |
128 |
| - * |
129 |
| - * |
130 |
| - * See `RegexExecutionMethods` |
131 |
| - * |
132 |
| - * See https://docs.python.org/3/library/re.html#regular-expression-objects |
133 |
| - */ |
134 |
| -private class CompiledRegex extends DataFlow::CallCfgNode, RegexExecution { |
135 |
| - DataFlow::Node regexNode; |
136 |
| - RegexExecutionMethod method; |
137 |
| - |
138 |
| - CompiledRegex() { |
139 |
| - exists( |
140 |
| - RegexDefinitionConfiguration conf, RegexDefinitonSource source, RegexDefinitionSink sink |
141 |
| - | |
142 |
| - conf.hasFlow(source, sink) and |
143 |
| - regexNode = source.getRegexNode() and |
144 |
| - method = sink.getMethod() and |
145 |
| - this = sink.getExecutingCall() |
146 |
| - ) |
147 |
| - } |
| 19 | + /** |
| 20 | + * A taint-tracking configuration for detecting "polynomial regular expression denial of service (ReDoS)" vulnerabilities. |
| 21 | + */ |
| 22 | + class Configuration extends TaintTracking::Configuration { |
| 23 | + Configuration() { this = "PolynomialReDoS" } |
148 | 24 |
|
149 |
| - override DataFlow::Node getRegexNode() { result = regexNode } |
| 25 | + override predicate isSource(DataFlow::Node source) { source instanceof Source } |
150 | 26 |
|
151 |
| - override DataFlow::Node getString() { |
152 |
| - result in [this.getArg(stringArg(method) - 1), this.getArgByName("string")] |
153 |
| - } |
154 |
| -} |
| 27 | + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } |
155 | 28 |
|
156 |
| -/** |
157 |
| - * A data flow sink node for polynomial regular expression denial-of-service vulnerabilities. |
158 |
| - */ |
159 |
| -class PolynomialReDoSSink extends DataFlow::Node { |
160 |
| - RegExpTerm t; |
| 29 | + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } |
161 | 30 |
|
162 |
| - PolynomialReDoSSink() { |
163 |
| - exists(RegexExecution re | |
164 |
| - re.getRegexNode().asExpr() = t.getRegex() and |
165 |
| - this = re.getString() |
166 |
| - ) and |
167 |
| - t.isRootTerm() |
| 31 | + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { |
| 32 | + guard instanceof SanitizerGuard |
| 33 | + } |
168 | 34 | }
|
169 |
| - |
170 |
| - /** Gets the regex that is being executed by this node. */ |
171 |
| - RegExpTerm getRegExp() { result = t } |
172 |
| - |
173 |
| - /** |
174 |
| - * Gets the node to highlight in the alert message. |
175 |
| - */ |
176 |
| - DataFlow::Node getHighlight() { result = this } |
177 | 35 | }
|
0 commit comments