Skip to content

Commit 9dede31

Browse files
authored
Merge pull request github#13077 from hvitved/ruby/track-regexp-improvements
Ruby: Improvements to `RegExpTracking`
2 parents 725a0a5 + 425ebba commit 9dede31

File tree

5 files changed

+183
-90
lines changed

5 files changed

+183
-90
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,8 +613,17 @@ class TypeBackTracker extends TTypeBackTracker {
613613
* also flow to `sink`.
614614
*/
615615
TypeTracker getACompatibleTypeTracker() {
616-
exists(boolean hasCall | result = MkTypeTracker(hasCall, content) |
617-
hasCall = false or this.hasReturn() = false
616+
exists(boolean hasCall, OptionalTypeTrackerContent c |
617+
result = MkTypeTracker(hasCall, c) and
618+
(
619+
compatibleContents(c, content)
620+
or
621+
content = noContent() and c = content
622+
)
623+
|
624+
hasCall = false
625+
or
626+
this.hasReturn() = false
618627
)
619628
}
620629
}

ruby/ql/lib/codeql/ruby/controlflow/CfgNodes.qll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -936,10 +936,10 @@ module ExprNodes {
936936
}
937937

938938
/** A control-flow node that wraps a `StringLiteral` AST expression. */
939-
class StringLiteralCfgNode extends ExprCfgNode {
940-
override string getAPrimaryQlClass() { result = "StringLiteralCfgNode" }
939+
class StringLiteralCfgNode extends StringlikeLiteralCfgNode {
940+
StringLiteralCfgNode() { e instanceof StringLiteral }
941941

942-
override StringLiteral e;
942+
override string getAPrimaryQlClass() { result = "StringLiteralCfgNode" }
943943

944944
final override StringLiteral getExpr() { result = super.getExpr() }
945945
}

ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@ private module Cached {
112112
)
113113
}
114114

115+
cached
116+
predicate summaryThroughStepTaint(
117+
DataFlow::Node arg, DataFlow::Node out, FlowSummaryImpl::Public::SummarizedCallable sc
118+
) {
119+
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(arg, out, sc)
120+
}
121+
115122
/**
116123
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
117124
* (intra-procedural) step.
@@ -122,7 +129,7 @@ private module Cached {
122129
defaultAdditionalTaintStep(nodeFrom, nodeTo) or
123130
// Simple flow through library code is included in the exposed local
124131
// step relation, even though flow is technically inter-procedural
125-
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(nodeFrom, nodeTo, _)
132+
summaryThroughStepTaint(nodeFrom, nodeTo, _)
126133
}
127134
}
128135

ruby/ql/lib/codeql/ruby/regexp/internal/RegExpTracking.qll

Lines changed: 150 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ private import codeql.ruby.typetracking.TypeTracker
2121
private import codeql.ruby.ApiGraphs
2222
private import codeql.ruby.Concepts
2323
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
24+
private import codeql.ruby.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
2425
private import codeql.ruby.TaintTracking
2526
private import codeql.ruby.frameworks.core.String
2627

@@ -37,43 +38,6 @@ DataFlow::LocalSourceNode strStart() {
3738
/** Gets a dataflow node for a regular expression literal. */
3839
DataFlow::LocalSourceNode regStart() { result.asExpr().getExpr() instanceof Ast::RegExpLiteral }
3940

40-
/**
41-
* Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
42-
* `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
43-
* The types are either "string" or "regexp".
44-
*/
45-
predicate step(
46-
DataFlow::Node nodeFrom, DataFlow::LocalSourceNode nodeTo, string fromType, string toType
47-
) {
48-
fromType = toType and
49-
fromType = "string" and
50-
(
51-
// include taint flow through `String` summaries
52-
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
53-
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
54-
String::SummarizedCallable
55-
or
56-
// string concatenations, and
57-
exists(CfgNodes::ExprNodes::OperationCfgNode op |
58-
op = nodeTo.asExpr() and
59-
op.getAnOperand() = nodeFrom.asExpr() and
60-
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
61-
)
62-
or
63-
// string interpolations
64-
nodeFrom.asExpr() =
65-
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
66-
)
67-
or
68-
fromType = "string" and
69-
toType = "reg" and
70-
exists(DataFlow::CallNode call |
71-
call = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]) and
72-
nodeFrom = call.getArgument(0) and
73-
nodeTo = call
74-
)
75-
}
76-
7741
/** Gets a node where string values that flow to the node are interpreted as regular expressions. */
7842
DataFlow::Node stringSink() {
7943
result instanceof RE::RegExpInterpretation::Range and
@@ -91,70 +55,174 @@ DataFlow::Node stringSink() {
9155
/** Gets a node where regular expressions that flow to the node are used. */
9256
DataFlow::Node regSink() { result = any(RegexExecution exec).getRegex() }
9357

94-
/** Gets a node that is reachable by type-tracking from any string or regular expression. */
95-
DataFlow::LocalSourceNode forward(TypeTracker t) {
96-
t.start() and
97-
result = [strStart(), regStart()]
98-
or
99-
exists(TypeTracker t2 | result = forward(t2).track(t2, t))
100-
or
101-
exists(TypeTracker t2 | t2 = t.continue() | step(forward(t2).getALocalUse(), result, _, _))
58+
private signature module TypeTrackInputSig {
59+
DataFlow::LocalSourceNode start(TypeTracker t, DataFlow::Node start);
60+
61+
predicate end(DataFlow::Node n);
62+
63+
predicate additionalStep(DataFlow::Node nodeFrom, DataFlow::LocalSourceNode nodeTo);
10264
}
10365

10466
/**
105-
* Gets a node that is backwards reachable from any regular expression use,
106-
* where that use is reachable by type-tracking from any string or regular expression.
67+
* Provides a version of type tracking where we first prune for reachable nodes,
68+
* before doing the type tracking computation.
10769
*/
108-
DataFlow::LocalSourceNode backwards(TypeBackTracker t) {
109-
t.start() and
110-
result.flowsTo([stringSink(), regSink()]) and
111-
result = forward(TypeTracker::end())
112-
or
113-
exists(TypeBackTracker t2 | result = backwards(t2).backtrack(t2, t))
114-
or
115-
exists(TypeBackTracker t2 | t2 = t.continue() | step(result.getALocalUse(), backwards(t2), _, _))
70+
private module TypeTrack<TypeTrackInputSig Input> {
71+
private predicate additionalStep(
72+
DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo
73+
) {
74+
Input::additionalStep(nodeFrom.getALocalUse(), nodeTo)
75+
}
76+
77+
/** Gets a node that is forwards reachable by type-tracking. */
78+
pragma[nomagic]
79+
private DataFlow::LocalSourceNode forward(TypeTracker t) {
80+
result = Input::start(t, _)
81+
or
82+
exists(TypeTracker t2 | result = forward(t2).track(t2, t))
83+
or
84+
exists(TypeTracker t2 | t2 = t.continue() | additionalStep(forward(t2), result))
85+
}
86+
87+
bindingset[result, tbt]
88+
pragma[inline_late]
89+
pragma[noopt]
90+
private DataFlow::LocalSourceNode forwardLateInline(TypeBackTracker tbt) {
91+
exists(TypeTracker tt |
92+
result = forward(tt) and
93+
tt = tbt.getACompatibleTypeTracker()
94+
)
95+
}
96+
97+
/** Gets a node that is backwards reachable by type-tracking. */
98+
pragma[nomagic]
99+
private DataFlow::LocalSourceNode backwards(TypeBackTracker t) {
100+
result = forwardLateInline(t) and
101+
(
102+
t.start() and
103+
Input::end(result.getALocalUse())
104+
or
105+
exists(TypeBackTracker t2 | result = backwards(t2).backtrack(t2, t))
106+
or
107+
exists(TypeBackTracker t2 | t2 = t.continue() | additionalStep(result, backwards(t2)))
108+
)
109+
}
110+
111+
bindingset[result, tt]
112+
pragma[inline_late]
113+
pragma[noopt]
114+
private DataFlow::LocalSourceNode backwardsInlineLate(TypeTracker tt) {
115+
exists(TypeBackTracker tbt |
116+
result = backwards(tbt) and
117+
tt = tbt.getACompatibleTypeTracker()
118+
)
119+
}
120+
121+
/** Holds if `n` is forwards and backwards reachable with type tracker `t`. */
122+
pragma[nomagic]
123+
private predicate reached(DataFlow::LocalSourceNode n, TypeTracker t) {
124+
n = forward(t) and
125+
n = backwardsInlineLate(t)
126+
}
127+
128+
pragma[nomagic]
129+
private TypeTracker stepReached(
130+
TypeTracker t, DataFlow::LocalSourceNode nodeFrom, DataFlow::LocalSourceNode nodeTo
131+
) {
132+
exists(StepSummary summary |
133+
StepSummary::step(nodeFrom, nodeTo, summary) and
134+
reached(nodeFrom, t) and
135+
reached(nodeTo, result) and
136+
result = t.append(summary)
137+
)
138+
or
139+
additionalStep(nodeFrom, nodeTo) and
140+
reached(nodeFrom, pragma[only_bind_into](t)) and
141+
reached(nodeTo, pragma[only_bind_into](t)) and
142+
result = t.continue()
143+
}
144+
145+
/** Gets a node that has been tracked from the start node `start`. */
146+
DataFlow::LocalSourceNode track(DataFlow::Node start, TypeTracker t) {
147+
t.start() and
148+
result = Input::start(t, start) and
149+
reached(result, t)
150+
or
151+
exists(TypeTracker t2 | t = stepReached(t2, track(start, t2), result))
152+
}
153+
}
154+
155+
/** Holds if `inputStr` is compiled to a regular expression that is returned at `call`. */
156+
pragma[nomagic]
157+
private predicate regFromString(DataFlow::LocalSourceNode inputStr, DataFlow::CallNode call) {
158+
exists(DataFlow::Node mid |
159+
inputStr.flowsTo(mid) and
160+
call = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]) and
161+
mid = call.getArgument(0)
162+
)
163+
}
164+
165+
private module StringTypeTrackInput implements TypeTrackInputSig {
166+
DataFlow::LocalSourceNode start(TypeTracker t, DataFlow::Node start) {
167+
start = strStart() and t.start() and result = start
168+
}
169+
170+
predicate end(DataFlow::Node n) {
171+
n = stringSink() or
172+
regFromString(n, _)
173+
}
174+
175+
predicate additionalStep(DataFlow::Node nodeFrom, DataFlow::LocalSourceNode nodeTo) {
176+
// include taint flow through `String` summaries
177+
TaintTrackingPrivate::summaryThroughStepTaint(nodeFrom, nodeTo,
178+
any(String::SummarizedCallable c))
179+
or
180+
// string concatenations, and
181+
exists(CfgNodes::ExprNodes::OperationCfgNode op |
182+
op = nodeTo.asExpr() and
183+
op.getAnOperand() = nodeFrom.asExpr() and
184+
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
185+
)
186+
or
187+
// string interpolations
188+
nodeFrom.asExpr() =
189+
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
190+
}
116191
}
117192

118193
/**
119194
* Gets a node that has been tracked from the string constant `start` to some node.
120195
* This is used to figure out where `start` is evaluated as a regular expression against an input string,
121196
* or where `start` is compiled into a regular expression.
122197
*/
123-
private DataFlow::LocalSourceNode trackStrings(DataFlow::Node start, TypeTracker t) {
124-
result = backwards(_) and
125-
(
198+
private predicate trackStrings = TypeTrack<StringTypeTrackInput>::track/2;
199+
200+
/** Holds if `strConst` flows to a regex compilation (tracked by `t`), where the resulting regular expression is stored in `reg`. */
201+
pragma[nomagic]
202+
private predicate regFromStringStart(DataFlow::Node strConst, TypeTracker t, DataFlow::CallNode reg) {
203+
regFromString(trackStrings(strConst, t), reg) and
204+
exists(t.continue())
205+
}
206+
207+
private module RegTypeTrackInput implements TypeTrackInputSig {
208+
DataFlow::LocalSourceNode start(TypeTracker t, DataFlow::Node start) {
209+
start = regStart() and
126210
t.start() and
127-
start = result and
128-
result = strStart()
211+
result = start
129212
or
130-
exists(TypeTracker t2 | result = trackStrings(start, t2).track(t2, t))
131-
or
132-
// an additional step from string to string
133-
exists(TypeTracker t2 | t2 = t.continue() |
134-
step(trackStrings(start, t2).getALocalUse(), result, "string", "string")
135-
)
136-
)
213+
regFromStringStart(start, t, result)
214+
}
215+
216+
predicate end(DataFlow::Node n) { n = regSink() }
217+
218+
predicate additionalStep(DataFlow::Node nodeFrom, DataFlow::LocalSourceNode nodeTo) { none() }
137219
}
138220

139221
/**
140222
* Gets a node that has been tracked from the regular expression `start` to some node.
141223
* This is used to figure out where `start` is executed against an input string.
142224
*/
143-
private DataFlow::LocalSourceNode trackRegs(DataFlow::Node start, TypeTracker t) {
144-
result = backwards(_) and
145-
(
146-
t.start() and
147-
start = result and
148-
result = regStart()
149-
or
150-
exists(TypeTracker t2 | result = trackRegs(start, t2).track(t2, t))
151-
or
152-
// an additional step where a string is converted to a regular expression
153-
exists(TypeTracker t2 | t2 = t.continue() |
154-
step(trackStrings(start, t2).getALocalUse(), result, "string", "reg")
155-
)
156-
)
157-
}
225+
private predicate trackRegs = TypeTrack<RegTypeTrackInput>::track/2;
158226

159227
/** Gets a node that references a regular expression. */
160228
private DataFlow::LocalSourceNode trackRegexpType(TypeTracker t) {

ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -613,8 +613,17 @@ class TypeBackTracker extends TTypeBackTracker {
613613
* also flow to `sink`.
614614
*/
615615
TypeTracker getACompatibleTypeTracker() {
616-
exists(boolean hasCall | result = MkTypeTracker(hasCall, content) |
617-
hasCall = false or this.hasReturn() = false
616+
exists(boolean hasCall, OptionalTypeTrackerContent c |
617+
result = MkTypeTracker(hasCall, c) and
618+
(
619+
compatibleContents(c, content)
620+
or
621+
content = noContent() and c = content
622+
)
623+
|
624+
hasCall = false
625+
or
626+
this.hasReturn() = false
618627
)
619628
}
620629
}

0 commit comments

Comments
 (0)