Skip to content

Commit 6e712b2

Browse files
committed
add tracking of strings to compile-sites for poly-redos, in the style of Ruby
1 parent 52959d7 commit 6e712b2

File tree

4 files changed

+23
-4
lines changed

4 files changed

+23
-4
lines changed

python/ql/lib/semmle/python/regex.qll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,17 @@ predicate used_as_regex(Expr s, string mode) {
8585
)
8686
}
8787

88+
private import semmle.python.Concepts
89+
private import semmle.python.RegexTreeView
90+
91+
/** Gets a parsed regular expression term that is executed at `exec`. */
92+
RegExpTerm getTermForExecution(RegexExecution exec) {
93+
exists(RegexTracking t, DataFlow::Node source | t.hasFlow(source, exec.getRegex()) |
94+
result.getRegex() = source.asExpr() and
95+
result.isRootTerm()
96+
)
97+
}
98+
8899
/**
89100
* Gets the canonical name for the API graph node corresponding to the `re` flag `flag`. For flags
90101
* that have multiple names, we pick the long-form name as a canonical representative.

python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSCustomizations.qll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
1313
private import semmle.python.dataflow.new.BarrierGuards
1414
private import semmle.python.RegexTreeView::RegexTreeView as TreeView
1515
private import semmle.python.ApiGraphs
16+
private import semmle.python.regex
1617

1718
/**
1819
* Provides default sources, sinks and sanitizers for detecting
@@ -66,7 +67,7 @@ module PolynomialReDoS {
6667

6768
RegexExecutionAsSink() {
6869
exists(RegexExecution re |
69-
re.getRegex().asExpr() = t.getRegex() and
70+
t = getTermForExecution(re) and
7071
this = re.getString()
7172
) and
7273
t.isRootTerm()

python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/PolynomialReDoS.expected

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ edges
66
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:8:30:8:33 | ControlFlowNode for text |
77
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:9:32:9:35 | ControlFlowNode for text |
88
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:12:17:12:20 | ControlFlowNode for text |
9+
| test.py:7:12:7:23 | ControlFlowNode for Attribute | test.py:18:28:18:31 | ControlFlowNode for text |
10+
| test.py:14:33:14:39 | ControlFlowNode for my_text | test.py:16:24:16:30 | ControlFlowNode for my_text |
11+
| test.py:18:28:18:31 | ControlFlowNode for text | test.py:14:33:14:39 | ControlFlowNode for my_text |
912
nodes
1013
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
1114
| test.py:2:26:2:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
@@ -15,8 +18,12 @@ nodes
1518
| test.py:8:30:8:33 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
1619
| test.py:9:32:9:35 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
1720
| test.py:12:17:12:20 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
21+
| test.py:14:33:14:39 | ControlFlowNode for my_text | semmle.label | ControlFlowNode for my_text |
22+
| test.py:16:24:16:30 | ControlFlowNode for my_text | semmle.label | ControlFlowNode for my_text |
23+
| test.py:18:28:18:31 | ControlFlowNode for text | semmle.label | ControlFlowNode for text |
1824
subpaths
1925
#select
2026
| test.py:8:30:8:33 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:8:30:8:33 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:8:21:8:23 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
2127
| test.py:9:32:9:35 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:9:32:9:35 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings starting with '0.9' and with many repetitions of '99'. | test.py:9:27:9:29 | \\d+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
2228
| test.py:12:17:12:20 | ControlFlowNode for text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:12:17:12:20 | ControlFlowNode for text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:11:31:11:33 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |
29+
| test.py:16:24:16:30 | ControlFlowNode for my_text | test.py:2:26:2:32 | ControlFlowNode for ImportMember | test.py:16:24:16:30 | ControlFlowNode for my_text | This $@ that depends on a $@ may run slow on strings with many repetitions of ' '. | test.py:18:23:18:25 | \\s+ | regular expression | test.py:2:26:2:32 | ControlFlowNode for ImportMember | user-provided value |

python/ql/test/query-tests/Security/CWE-730-PolynomialReDoS/test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ def code_execution():
1111
reg = re.compile(r"^\s+|\s+$")
1212
reg.sub("", text) # NOT OK
1313

14-
def indirect(input_reg_str):
14+
def indirect(input_reg_str, my_text):
1515
my_reg = re.compile(input_reg_str)
16-
my_reg.sub("", text) # NOT OK - but not found
16+
my_reg.sub("", my_text) # NOT OK
1717

18-
indirect(r"^\s+|\s+$")
18+
indirect(r"^\s+|\s+$", text)
1919

0 commit comments

Comments
 (0)