Skip to content

Commit 60b0f25

Browse files
committed
Ruby: Improvements to RegExpTracking
1 parent fa0a999 commit 60b0f25

File tree

4 files changed

+165
-82
lines changed

4 files changed

+165
-82
lines changed

ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -936,10 +936,10 @@ module ExprNodes {
936936
}
937937

938938
/** A control-flow node that wraps a `StringLiteral` AST expression. */
939-
class StringLiteralCfgNode extends ExprCfgNode {
940-
override string getAPrimaryQlClass() { result = "StringLiteralCfgNode" }
939+
class StringLiteralCfgNode extends StringlikeLiteralCfgNode {
940+
StringLiteralCfgNode() { e instanceof StringLiteral }
941941

942-
override StringLiteral e;
942+
override string getAPrimaryQlClass() { result = "StringLiteralCfgNode" }
943943

944944
final override StringLiteral getExpr() { result = super.getExpr() }
945945
}

ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@ private module Cached {
112112
)
113113
}
114114

115+
cached
116+
predicate summaryThroughStepTaint(
117+
DataFlow::Node arg, DataFlow::Node out, FlowSummaryImpl::Public::SummarizedCallable sc
118+
) {
119+
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(arg, out, sc)
120+
}
121+
115122
/**
116123
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
117124
* (intra-procedural) step.
@@ -122,7 +129,7 @@ private module Cached {
122129
defaultAdditionalTaintStep(nodeFrom, nodeTo) or
123130
// Simple flow through library code is included in the exposed local
124131
// step relation, even though flow is technically inter-procedural
125-
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(nodeFrom, nodeTo, _)
132+
summaryThroughStepTaint(nodeFrom, nodeTo, _)
126133
}
127134
}
128135

ruby/ql/lib/codeql/ruby/regexp/internal/RegExpTracking.qll

Lines changed: 143 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ private import codeql.ruby.typetracking.TypeTracker
2121
private import codeql.ruby.ApiGraphs
2222
private import codeql.ruby.Concepts
2323
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
24+
private import codeql.ruby.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
2425
private import codeql.ruby.TaintTracking
2526
private import codeql.ruby.frameworks.core.String
2627

@@ -37,43 +38,6 @@ DataFlow::LocalSourceNode strStart() {
3738
/** Gets a dataflow node for a regular expression literal. */
3839
DataFlow::LocalSourceNode regStart() { result.asExpr().getExpr() instanceof Ast::RegExpLiteral }
3940

40-
/**
41-
* Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
42-
* `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
43-
* The types are either "string" or "regexp".
44-
*/
45-
predicate step(
46-
DataFlow::Node nodeFrom, DataFlow::LocalSourceNode nodeTo, string fromType, string toType
47-
) {
48-
fromType = toType and
49-
fromType = "string" and
50-
(
51-
// include taint flow through `String` summaries
52-
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
53-
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
54-
String::SummarizedCallable
55-
or
56-
// string concatenations, and
57-
exists(CfgNodes::ExprNodes::OperationCfgNode op |
58-
op = nodeTo.asExpr() and
59-
op.getAnOperand() = nodeFrom.asExpr() and
60-
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
61-
)
62-
or
63-
// string interpolations
64-
nodeFrom.asExpr() =
65-
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
66-
)
67-
or
68-
fromType = "string" and
69-
toType = "reg" and
70-
exists(DataFlow::CallNode call |
71-
call = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]) and
72-
nodeFrom = call.getArgument(0) and
73-
nodeTo = call
74-
)
75-
}
76-
7741
/** Gets a node where string values that flow to the node are interpreted as regular expressions. */
7842
DataFlow::Node stringSink() {
7943
result instanceof RE::RegExpInterpretation::Range and
@@ -91,69 +55,172 @@ DataFlow::Node stringSink() {
9155
/** Gets a node where regular expressions that flow to the node are used. */
9256
DataFlow::Node regSink() { result = any(RegexExecution exec).getRegex() }
9357

94-
/** Gets a node that is reachable by type-tracking from any string or regular expression. */
95-
DataFlow::LocalSourceNode forward(TypeTracker t) {
96-
t.start() and
97-
result = [strStart(), regStart()]
98-
or
99-
exists(TypeTracker t2 | result = forward(t2).track(t2, t))
100-
or
101-
exists(TypeTracker t2 | t2 = t.continue() | step(forward(t2).getALocalUse(), result, _, _))
58+
private signature module ReachInputSig {
59+
DataFlow::LocalSourceNode start(TypeTracker t);
60+
61+
DataFlow::Node end();
62+
63+
predicate additionalStep(DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo);
10264
}
10365

104-
/**
105-
* Gets a node that is backwards reachable from any regular expression use,
106-
* where that use is reachable by type-tracking from any string or regular expression.
107-
*/
108-
DataFlow::LocalSourceNode backwards(TypeBackTracker t) {
109-
t.start() and
110-
result.flowsTo([stringSink(), regSink()]) and
111-
result = forward(TypeTracker::end())
112-
or
113-
exists(TypeBackTracker t2 | result = backwards(t2).backtrack(t2, t))
114-
or
115-
exists(TypeBackTracker t2 | t2 = t.continue() | step(result.getALocalUse(), backwards(t2), _, _))
66+
private module Reach<ReachInputSig Input> {
67+
/** Gets a node that is forwards reachable by type-tracking. */
68+
pragma[nomagic]
69+
private DataFlow::LocalSourceNode forward(TypeTracker t) {
70+
result = Input::start(t)
71+
or
72+
exists(TypeTracker t2 | result = forward(t2).track(t2, t))
73+
or
74+
exists(TypeTracker t2 | t2 = t.continue() | Input::additionalStep(forward(t2), result))
75+
}
76+
77+
bindingset[result, tbt]
78+
pragma[inline_late]
79+
pragma[noopt]
80+
private DataFlow::LocalSourceNode forwardLateInline(TypeBackTracker tbt) {
81+
exists(TypeTracker tt |
82+
result = forward(tt) and
83+
tt = tbt.getACompatibleTypeTracker()
84+
)
85+
}
86+
87+
/** Gets a node that is backwards reachable by type-tracking. */
88+
pragma[nomagic]
89+
private DataFlow::LocalSourceNode backwards(TypeBackTracker t) {
90+
result = forwardLateInline(t) and
91+
(
92+
t.start() and
93+
result.flowsTo(Input::end())
94+
or
95+
exists(TypeBackTracker t2 | result = backwards(t2).backtrack(t2, t))
96+
or
97+
exists(TypeBackTracker t2 | t2 = t.continue() | Input::additionalStep(result, backwards(t2)))
98+
)
99+
}
100+
101+
bindingset[result, tt]
102+
pragma[inline_late]
103+
pragma[noopt]
104+
private DataFlow::LocalSourceNode backwardsInlineLate(TypeTracker tt) {
105+
exists(TypeBackTracker tbt |
106+
result = backwards(tbt) and
107+
tt = tbt.getACompatibleTypeTracker()
108+
)
109+
}
110+
111+
pragma[nomagic]
112+
predicate reached(DataFlow::LocalSourceNode n, TypeTracker t) {
113+
n = forward(t) and
114+
n = backwardsInlineLate(t)
115+
}
116+
117+
pragma[nomagic]
118+
TypeTracker stepReached(
119+
TypeTracker t, DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo
120+
) {
121+
exists(StepSummary summary |
122+
StepSummary::step(nodeFrom, nodeTo, summary) and
123+
reached(nodeFrom, t) and
124+
reached(nodeTo, result) and
125+
result = t.append(summary)
126+
)
127+
or
128+
Input::additionalStep(nodeFrom, nodeTo) and
129+
reached(nodeFrom, pragma[only_bind_into](t)) and
130+
reached(nodeTo, pragma[only_bind_into](t)) and
131+
result = t.continue()
132+
}
133+
}
134+
135+
pragma[nomagic]
136+
private predicate regFromString(DataFlow::LocalSourceNode n, DataFlow::CallNode call) {
137+
exists(DataFlow::Node mid |
138+
n.flowsTo(mid) and
139+
call = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]) and
140+
mid = call.getArgument(0)
141+
)
142+
}
143+
144+
private module StringReachInput implements ReachInputSig {
145+
DataFlow::LocalSourceNode start(TypeTracker t) { result = strStart() and t.start() }
146+
147+
DataFlow::Node end() {
148+
result = stringSink() or
149+
regFromString(result, _)
150+
}
151+
152+
predicate additionalStep(DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo) {
153+
exists(DataFlow::Node mid | nodeFrom.flowsTo(mid) |
154+
// include taint flow through `String` summaries
155+
TaintTrackingPrivate::summaryThroughStepTaint(mid, nodeTo, any(String::SummarizedCallable c))
156+
or
157+
// string concatenations, and
158+
exists(CfgNodes::ExprNodes::OperationCfgNode op |
159+
op = nodeTo.asExpr() and
160+
op.getAnOperand() = mid.asExpr() and
161+
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
162+
)
163+
or
164+
// string interpolations
165+
mid.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
166+
)
167+
}
116168
}
117169

170+
private module StringReach = Reach<StringReachInput>;
171+
118172
/**
119173
* Gets a node that has been tracked from the string constant `start` to some node.
120174
* This is used to figure out where `start` is evaluated as a regular expression against an input string,
121175
* or where `start` is compiled into a regular expression.
122176
*/
123177
private DataFlow::LocalSourceNode trackStrings(DataFlow::Node start, TypeTracker t) {
124-
result = backwards(_) and
125-
(
126-
t.start() and
127-
start = result and
128-
result = strStart()
129-
or
130-
exists(TypeTracker t2 | result = trackStrings(start, t2).track(t2, t))
178+
t.start() and
179+
start = result and
180+
result = strStart() and
181+
StringReach::reached(result, t)
182+
or
183+
exists(TypeTracker t2 | t = StringReach::stepReached(t2, trackStrings(start, t2), result))
184+
}
185+
186+
pragma[nomagic]
187+
private predicate regFromStringStart(DataFlow::Node start, TypeTracker t, DataFlow::CallNode nodeTo) {
188+
regFromString(trackStrings(start, t), nodeTo) and
189+
exists(t.continue())
190+
}
191+
192+
private module RegReachInput implements ReachInputSig {
193+
DataFlow::LocalSourceNode start(TypeTracker t) {
194+
result = regStart() and
195+
t.start()
131196
or
132-
// an additional step from string to string
133-
exists(TypeTracker t2 | t2 = t.continue() |
134-
step(trackStrings(start, t2).getALocalUse(), result, "string", "string")
135-
)
136-
)
197+
regFromStringStart(_, t, result)
198+
}
199+
200+
DataFlow::Node end() { result = regSink() }
201+
202+
predicate additionalStep(DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo) {
203+
none()
204+
}
137205
}
138206

207+
private module RegReach = Reach<RegReachInput>;
208+
139209
/**
140210
* Gets a node that has been tracked from the regular expression `start` to some node.
141211
* This is used to figure out where `start` is executed against an input string.
142212
*/
143213
private DataFlow::LocalSourceNode trackRegs(DataFlow::Node start, TypeTracker t) {
144-
result = backwards(_) and
214+
RegReach::reached(result, t) and
145215
(
146216
t.start() and
147217
start = result and
148218
result = regStart()
149219
or
150-
exists(TypeTracker t2 | result = trackRegs(start, t2).track(t2, t))
151-
or
152-
// an additional step where a string is converted to a regular expression
153-
exists(TypeTracker t2 | t2 = t.continue() |
154-
step(trackStrings(start, t2).getALocalUse(), result, "string", "reg")
155-
)
220+
regFromStringStart(start, t, result)
156221
)
222+
or
223+
exists(TypeTracker t2 | t = RegReach::stepReached(t2, trackRegs(start, t2), result))
157224
}
158225

159226
/** Gets a node that references a regular expression. */

ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,8 +613,17 @@ class TypeBackTracker extends TTypeBackTracker {
613613
* also flow to `sink`.
614614
*/
615615
TypeTracker getACompatibleTypeTracker() {
616-
exists(boolean hasCall | result = MkTypeTracker(hasCall, content) |
617-
hasCall = false or this.hasReturn() = false
616+
exists(boolean hasCall, OptionalTypeTrackerContent c |
617+
result = MkTypeTracker(hasCall, c) and
618+
(
619+
compatibleContents(c, content)
620+
or
621+
content = noContent() and c = content
622+
)
623+
|
624+
hasCall = false
625+
or
626+
this.hasReturn() = false
618627
)
619628
}
620629
}

0 commit comments

Comments
 (0)