Skip to content

Commit 25e65e0

Browse files
committed
rewrite the regexp tracking DataFlow::Configuration to TypeTracking
1 parent d0b627b commit 25e65e0

File tree

3 files changed

+64
-59
lines changed

3 files changed

+64
-59
lines changed

ruby/ql/lib/codeql/ruby/regexp/internal/RegExpConfiguration.qll

Lines changed: 63 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -11,80 +11,67 @@ private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
1111
private import codeql.ruby.TaintTracking
1212
private import codeql.ruby.frameworks.core.String
1313

14-
class RegExpConfiguration extends Configuration {
15-
RegExpConfiguration() { this = "RegExpConfiguration" }
16-
17-
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
18-
// track both string literals and regexp literals - the latter for finding executions of regular expressions that are used elsewhere.
19-
state = "string" and
20-
source.asExpr() =
21-
any(ExprCfgNode e |
22-
e.getConstantValue().isString(_) and
23-
not e instanceof ExprNodes::VariableReadAccessCfgNode and
24-
not e instanceof ExprNodes::ConstantReadAccessCfgNode
25-
)
26-
or
27-
state = "reg" and
28-
source.asExpr().getExpr() instanceof Ast::RegExpLiteral
29-
}
30-
31-
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
32-
state = "string" and
33-
sink instanceof RE::RegExpInterpretation::Range
34-
or
35-
state = "reg" and
36-
sink = any(RegexExecution exec).getRegex()
37-
}
38-
39-
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
40-
state = "string" and
41-
exists(DataFlow::CallNode mce | mce.getMethodName() = ["match", "match?"] |
42-
// receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
43-
node = mce.getReceiver() and
44-
mce.getArgument(0) = trackRegexpType()
45-
or
46-
// first argument of https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
47-
node = mce.getArgument(0) and
48-
mce.getReceiver() = trackRegexpType()
49-
)
50-
}
51-
52-
override predicate isAdditionalFlowStep(
53-
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
54-
DataFlow::FlowState stateTo
55-
) {
56-
stateFrom = "string" and
57-
stateTo = "reg" and
58-
exists(DataFlow::CallNode call |
59-
call = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]) and
60-
nodeFrom = call.getArgument(0) and
61-
nodeTo = call
14+
/**
15+
* Gets a node that has been tracked from the string constant `start` to some node.
16+
* This is used to figure out where `start` is evaluated as a regular expression against an input string,
17+
* or where `start` is compiled into a regular expression.
18+
*/
19+
private DataFlow::LocalSourceNode strToReg(DataFlow::Node start, TypeTracker t) {
20+
t.start() and
21+
start = result and
22+
result.asExpr() =
23+
any(ExprCfgNode e |
24+
e.getConstantValue().isString(_) and
25+
not e instanceof ExprNodes::VariableReadAccessCfgNode and
26+
not e instanceof ExprNodes::ConstantReadAccessCfgNode
6227
)
63-
or
64-
stateFrom = stateTo and
65-
stateFrom = "string" and
28+
or
29+
exists(TypeTracker t2 | result = strToReg(start, t2).track(t2, t))
30+
or
31+
exists(TypeTracker t2, DataFlow::Node nodeFrom | t2 = t.continue() |
32+
strToReg(start, t2).flowsTo(nodeFrom) and
6633
(
6734
// include taint flow through `String` summaries
68-
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
35+
TaintTracking::localTaintStep(nodeFrom, result) and
6936
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
7037
String::SummarizedCallable
7138
or
7239
// string concatenations, and
7340
exists(CfgNodes::ExprNodes::OperationCfgNode op |
74-
op = nodeTo.asExpr() and
41+
op = result.asExpr() and
7542
op.getAnOperand() = nodeFrom.asExpr() and
7643
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
7744
)
7845
or
7946
// string interpolations
8047
nodeFrom.asExpr() =
81-
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
48+
result.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
8249
)
83-
}
50+
)
51+
}
8452

85-
override int fieldFlowBranchLimit() { result = 1 }
53+
/**
54+
* Gets a node that has been tracked from the regular expression `start` to some node.
55+
* This is used to figure out where `start` is executed against an input string.
56+
*/
57+
private DataFlow::LocalSourceNode regToReg(DataFlow::Node start, TypeTracker t) {
58+
t.start() and
59+
start = result and
60+
result.asExpr().getExpr() instanceof Ast::RegExpLiteral
61+
or
62+
exists(TypeTracker t2 | result = regToReg(start, t2).track(t2, t))
63+
or
64+
exists(TypeTracker t2 |
65+
t2 = t.continue() and
66+
exists(DataFlow::CallNode call |
67+
call = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]) and
68+
strToReg(start, t2).flowsTo(call.getArgument(0)) and
69+
result = call
70+
)
71+
)
8672
}
8773

74+
/** Gests a node that references a regular expression. */
8875
private DataFlow::LocalSourceNode trackRegexpType(TypeTracker t) {
8976
t.start() and
9077
(
@@ -95,9 +82,28 @@ private DataFlow::LocalSourceNode trackRegexpType(TypeTracker t) {
9582
exists(TypeTracker t2 | result = trackRegexpType(t2).track(t2, t))
9683
}
9784

85+
/** Gests a node that references a regular expression. */
9886
DataFlow::Node trackRegexpType() { trackRegexpType(TypeTracker::end()).flowsTo(result) }
9987

88+
/** Gets a the value for the regular expression that is evaluated at `re`. */
10089
cached
10190
DataFlow::Node regExpSource(DataFlow::Node re) {
102-
exists(RegExpConfiguration c | c.hasFlow(result, re))
91+
exists(DataFlow::LocalSourceNode end | end = strToReg(result, TypeTracker::end()) |
92+
end.flowsTo(re) and
93+
re instanceof RE::RegExpInterpretation::Range and
94+
not exists(DataFlow::CallNode mce | mce.getMethodName() = ["match", "match?"] |
95+
// receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
96+
re = mce.getReceiver() and
97+
mce.getArgument(0) = trackRegexpType()
98+
or
99+
// first argument of https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
100+
re = mce.getArgument(0) and
101+
mce.getReceiver() = trackRegexpType()
102+
)
103+
)
104+
or
105+
exists(DataFlow::LocalSourceNode end | end = regToReg(result, TypeTracker::end()) |
106+
end.flowsTo(re) and
107+
re = any(RegexExecution exec).getRegex()
108+
)
103109
}

ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/IncompleteHostnameRegExp.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
| tst-IncompleteHostnameRegExp.rb:20:14:20:31 | ^test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:20:13:20:26 | "#{...}$" | here |
1717
| tst-IncompleteHostnameRegExp.rb:22:24:22:40 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:23:13:23:29 | ...[...] | here |
1818
| tst-IncompleteHostnameRegExp.rb:28:24:28:40 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:63:20:63:36 | ...[...] | here |
19-
| tst-IncompleteHostnameRegExp.rb:30:27:30:43 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:66:20:66:36 | ...[...] | here |
2019
| tst-IncompleteHostnameRegExp.rb:37:3:37:53 | ^(https?:)?\\/\\/((service\|www).)?example.com(?=$\|\\/) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:37:2:37:54 | /^(https?:)?\\/\\/((service\|www).../ | here |
2120
| tst-IncompleteHostnameRegExp.rb:38:3:38:43 | ^(http\|https):\\/\\/www.example.com\\/p\\/f\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:38:2:38:44 | /^(http\|https):\\/\\/www.example.../ | here |
2221
| tst-IncompleteHostnameRegExp.rb:39:5:39:30 | http:\\/\\/sub.example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.rb:39:2:39:33 | /^(http:\\/\\/sub.example.com\\/)/ | here |

ruby/ql/test/query-tests/security/cwe-020/IncompleteHostnameRegExp/tst-IncompleteHostnameRegExp.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def foo
2727

2828
convert1({ hostname: 'test.example.com$' }); # NOT OK
2929

30-
domains = [ { hostname: 'test.example.com$' } ]; # NOT OK
30+
domains = [ { hostname: 'test.example.com$' } ]; # NOT OK - but not flagged due to limitations of TypeTracking.
3131

3232

3333

0 commit comments

Comments
 (0)