Skip to content

Commit 1c08592

Browse files
authored
Merge pull request #329 from github/hvitved/dataflow/synth-return
Data flow: Add a synthetic return node
2 parents c50a6c1 + c540615 commit 1c08592

File tree

8 files changed

+154
-49
lines changed

8 files changed

+154
-49
lines changed

ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
436436
* Holds if `e` is an `ExprNode` that may be returned by a call to `c`.
437437
*/
438438
predicate exprNodeReturnedFrom(DataFlow::ExprNode e, Callable c) {
439-
exists(ReturnNode r |
439+
exists(ReturningNode r |
440440
r.getEnclosingCallable().asCallable() = c and
441441
(
442442
r.(ExplicitReturnNode).getReturningNode().getReturnedValueNode() = e.asExpr() or

ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 118 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,19 @@ module LocalFlow {
4646
)
4747
}
4848

49+
/** Gets the SSA definition node corresponding to parameter `p`. */
50+
SsaDefinitionNode getParameterDefNode(NamedParameter p) {
51+
exists(BasicBlock bb, int i |
52+
bb.getNode(i).getNode() = p.getDefiningAccess() and
53+
result.getDefinition().definesAt(_, bb, i)
54+
)
55+
}
56+
4957
/**
5058
* Holds if there is a local flow step from `nodeFrom` to `nodeTo` involving
51-
* SSA definition `def.
59+
* SSA definition `def`.
5260
*/
5361
predicate localSsaFlowStep(Ssa::Definition def, Node nodeFrom, Node nodeTo) {
54-
// Flow from parameter into SSA definition
55-
exists(BasicBlock bb, int i |
56-
bb.getNode(i).getNode() =
57-
nodeFrom.(ParameterNode).getParameter().(NamedParameter).getDefiningAccess() and
58-
nodeTo.(SsaDefinitionNode).getDefinition().definesAt(_, bb, i)
59-
)
60-
or
6162
// Flow from assignment into SSA definition
6263
def.(Ssa::WriteDefinition).assigns(nodeFrom.asExpr()) and
6364
nodeTo.(SsaDefinitionNode).getDefinition() = def
@@ -114,6 +115,12 @@ private module Cached {
114115
newtype TNode =
115116
TExprNode(CfgNodes::ExprCfgNode n) or
116117
TReturningNode(CfgNodes::ReturningCfgNode n) or
118+
TSynthReturnNode(CfgScope scope, ReturnKind kind) {
119+
exists(ReturningNode ret |
120+
ret.(NodeImpl).getCfgScope() = scope and
121+
ret.getKind() = kind
122+
)
123+
} or
117124
TSsaDefinitionNode(Ssa::Definition def) or
118125
TNormalParameterNode(Parameter p) { not p instanceof BlockParameter } or
119126
TSelfParameterNode(MethodBase m) or
@@ -139,21 +146,14 @@ private module Cached {
139146
class TParameterNode =
140147
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode;
141148

142-
/**
143-
* This is the local flow predicate that is used as a building block in global
144-
* data flow. It excludes SSA flow through instance fields, as flow through fields
145-
* is handled by the global data-flow library, but includes various other steps
146-
* that are only relevant for global flow.
147-
*/
148-
cached
149-
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
150-
exists(Ssa::Definition def | LocalFlow::localSsaFlowStep(def, nodeFrom, nodeTo))
151-
or
152-
nodeTo.(ParameterNode).getParameter().(OptionalParameter).getDefaultValue() =
153-
nodeFrom.asExpr().getExpr()
149+
private predicate defaultValueFlow(NamedParameter p, ExprNode e) {
150+
p.(OptionalParameter).getDefaultValue() = e.getExprNode().getExpr()
154151
or
155-
nodeTo.(ParameterNode).getParameter().(KeywordParameter).getDefaultValue() =
156-
nodeFrom.asExpr().getExpr()
152+
p.(KeywordParameter).getDefaultValue() = e.getExprNode().getExpr()
153+
}
154+
155+
private predicate localFlowStepCommon(Node nodeFrom, Node nodeTo) {
156+
LocalFlow::localSsaFlowStep(_, nodeFrom, nodeTo)
157157
or
158158
nodeFrom.(SelfParameterNode).getMethod() = nodeTo.asExpr().getExpr().getEnclosingCallable() and
159159
nodeTo.asExpr().getExpr() instanceof Self
@@ -186,12 +186,66 @@ private module Cached {
186186
) and
187187
nodeFrom.asExpr() = for.getValue()
188188
)
189+
}
190+
191+
/**
192+
* This is the local flow predicate that is used as a building block in global
193+
* data flow.
194+
*/
195+
cached
196+
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
197+
localFlowStepCommon(nodeFrom, nodeTo)
198+
or
199+
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
200+
or
201+
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
202+
or
203+
nodeTo.(SynthReturnNode).getAnInput() = nodeFrom
189204
or
190205
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, true)
191206
}
192207

208+
/** This is the local flow predicate that is exposed. */
193209
cached
194-
predicate isLocalSourceNode(Node n) { not simpleLocalFlowStep+(any(ExprNode e), n) }
210+
predicate localFlowStepImpl(Node nodeFrom, Node nodeTo) {
211+
localFlowStepCommon(nodeFrom, nodeTo)
212+
or
213+
defaultValueFlow(nodeTo.(ParameterNode).getParameter(), nodeFrom)
214+
or
215+
nodeTo = LocalFlow::getParameterDefNode(nodeFrom.(ParameterNode).getParameter())
216+
or
217+
// Simple flow through library code is included in the exposed local
218+
// step relation, even though flow is technically inter-procedural
219+
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
220+
}
221+
222+
/** This is the local flow predicate that is used in type tracking. */
223+
cached
224+
predicate localFlowStepTypeTracker(Node nodeFrom, Node nodeTo) {
225+
localFlowStepCommon(nodeFrom, nodeTo)
226+
or
227+
exists(NamedParameter p |
228+
defaultValueFlow(p, nodeFrom) and
229+
nodeTo = LocalFlow::getParameterDefNode(p)
230+
)
231+
}
232+
233+
cached
234+
predicate isLocalSourceNode(Node n) {
235+
n instanceof ParameterNode
236+
or
237+
// This case should not be needed once we have proper use-use flow
238+
// for `self`. At that point, the `self`s returned by `trackInstance`
239+
// in `DataFlowDispatch.qll` should refer to the post-update node,
240+
// and we can remove this case.
241+
n instanceof SelfArgumentNode
242+
or
243+
not localFlowStepTypeTracker+(any(Node e |
244+
e instanceof ExprNode
245+
or
246+
e instanceof ParameterNode
247+
), n)
248+
}
195249

196250
cached
197251
newtype TContent = TTodoContent() // stub
@@ -208,6 +262,8 @@ predicate nodeIsHidden(Node n) {
208262
n instanceof SummaryNode
209263
or
210264
n instanceof SummaryParameterNode
265+
or
266+
n instanceof SynthReturnNode
211267
}
212268

213269
/** An SSA definition, viewed as a node in a data flow graph. */
@@ -234,7 +290,7 @@ class SsaDefinitionNode extends NodeImpl, TSsaDefinitionNode {
234290
* `ControlFlow::Node`s.
235291
*/
236292
class ReturningStatementNode extends NodeImpl, TReturningNode {
237-
private CfgNodes::ReturningCfgNode n;
293+
CfgNodes::ReturningCfgNode n;
238294

239295
ReturningStatementNode() { this = TReturningNode(n) }
240296

@@ -436,6 +492,12 @@ private module ArgumentNodes {
436492

437493
import ArgumentNodes
438494

495+
/** A data-flow node that represents a value syntactically returned by a callable. */
496+
abstract class ReturningNode extends Node {
497+
/** Gets the kind of this return node. */
498+
abstract ReturnKind getKind();
499+
}
500+
439501
/** A data-flow node that represents a value returned by a callable. */
440502
abstract class ReturnNode extends Node {
441503
/** Gets the kind of this return node. */
@@ -463,11 +525,9 @@ private module ReturnNodes {
463525
* A data-flow node that represents an expression returned by a callable,
464526
* either using an explict `return` statement or as the expression of a method body.
465527
*/
466-
class ExplicitReturnNode extends ReturnNode, ReturningStatementNode {
467-
private CfgNodes::ReturningCfgNode n;
468-
528+
class ExplicitReturnNode extends ReturningNode, ReturningStatementNode {
469529
ExplicitReturnNode() {
470-
isValid(this.getReturningNode()) and
530+
isValid(n) and
471531
n.getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
472532
n.getScope() instanceof Callable
473533
}
@@ -479,7 +539,7 @@ private module ReturnNodes {
479539
}
480540
}
481541

482-
class ExprReturnNode extends ReturnNode, ExprNode {
542+
class ExprReturnNode extends ReturningNode, ExprNode {
483543
ExprReturnNode() {
484544
this.getExprNode().getASuccessor().(CfgNodes::AnnotatedExitNode).isNormal() and
485545
this.(NodeImpl).getCfgScope() instanceof Callable
@@ -488,6 +548,34 @@ private module ReturnNodes {
488548
override ReturnKind getKind() { result instanceof NormalReturnKind }
489549
}
490550

551+
/**
552+
* A synthetic data-flow node for joining flow from different syntactic
553+
* returns into a single node.
554+
*
555+
* This node only exists to avoid computing the product of a large fan-in
556+
* with a large fan-out.
557+
*/
558+
class SynthReturnNode extends NodeImpl, ReturnNode, TSynthReturnNode {
559+
private CfgScope scope;
560+
private ReturnKind kind;
561+
562+
SynthReturnNode() { this = TSynthReturnNode(scope, kind) }
563+
564+
/** Gets a syntactic return node that flows into this synthetic node. */
565+
ReturningNode getAnInput() {
566+
result.(NodeImpl).getCfgScope() = scope and
567+
result.getKind() = kind
568+
}
569+
570+
override ReturnKind getKind() { result = kind }
571+
572+
override CfgScope getCfgScope() { result = scope }
573+
574+
override Location getLocationImpl() { result = scope.getLocation() }
575+
576+
override string toStringImpl() { result = "return " + kind + " in " + scope }
577+
}
578+
491579
private class SummaryReturnNode extends SummaryNode, ReturnNode {
492580
private ReturnKind rk;
493581

@@ -631,7 +719,7 @@ private import PostUpdateNodes
631719

632720
/** A node that performs a type cast. */
633721
class CastNode extends Node {
634-
CastNode() { none() }
722+
CastNode() { this instanceof ReturningNode }
635723
}
636724

637725
class DataFlowExpr = CfgNodes::ExprCfgNode;

ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ predicate hasLocalSource(Node sink, Node source) {
120120
or
121121
exists(Node mid |
122122
hasLocalSource(mid, source) and
123-
simpleLocalFlowStep(mid, sink)
123+
localFlowStepTypeTracker(mid, sink)
124124
)
125125
}
126126

@@ -136,13 +136,7 @@ ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
136136
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
137137
* (intra-procedural) step.
138138
*/
139-
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
140-
simpleLocalFlowStep(nodeFrom, nodeTo)
141-
or
142-
// Simple flow through library code is included in the exposed local
143-
// step relation, even though flow is technically inter-procedural
144-
FlowSummaryImpl::Private::Steps::summaryThroughStep(nodeFrom, nodeTo, true)
145-
}
139+
predicate localFlowStep = localFlowStepImpl/2;
146140

147141
/**
148142
* Holds if data flows from `source` to `sink` in zero or more local

ql/lib/codeql/ruby/typetracking/TypeTrackerSpecific.qll

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class Node = DataFlowPublic::Node;
1111

1212
class TypeTrackingNode = DataFlowPublic::LocalSourceNode;
1313

14-
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
14+
predicate simpleLocalFlowStep = DataFlowPrivate::localFlowStepTypeTracker/2;
1515

1616
predicate jumpStep = DataFlowPrivate::jumpStep/2;
1717

@@ -30,12 +30,21 @@ string getPossibleContentName() { result = getSetterCallAttributeName(_) }
3030
* recursion (or, at best, terrible performance), since identifying calls to library
3131
* methods is done using API graphs (which uses type tracking).
3232
*/
33-
predicate callStep(DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
33+
predicate callStep(Node nodeFrom, Node nodeTo) {
3434
exists(ExprNodes::CallCfgNode call, CFG::CfgScope callable, int i |
3535
DataFlowDispatch::getTarget(call) = callable and
36-
nodeFrom.sourceArgumentOf(call, i) and
36+
nodeFrom.(DataFlowPrivate::ArgumentNode).sourceArgumentOf(call, i) and
3737
nodeTo.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(callable, i)
3838
)
39+
or
40+
// In normal data-flow, this will be a local flow step. But for type tracking
41+
// we model it as a call step, in order to avoid computing a potential
42+
// self-cross product of all calls to a function that returns one of its parameters
43+
// (only to later filter that flow out using `TypeTracker::append`).
44+
nodeTo =
45+
DataFlowPrivate::LocalFlow::getParameterDefNode(nodeFrom
46+
.(DataFlowPublic::ParameterNode)
47+
.getParameter())
3948
}
4049

4150
/**
@@ -45,11 +54,18 @@ predicate callStep(DataFlowPrivate::ArgumentNode nodeFrom, DataFlowPublic::Param
4554
* recursion (or, at best, terrible performance), since identifying calls to library
4655
* methods is done using API graphs (which uses type tracking).
4756
*/
48-
predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
57+
predicate returnStep(Node nodeFrom, Node nodeTo) {
4958
exists(ExprNodes::CallCfgNode call |
59+
nodeFrom instanceof DataFlowPrivate::ReturnNode and
5060
nodeFrom.(DataFlowPrivate::NodeImpl).getCfgScope() = DataFlowDispatch::getTarget(call) and
5161
nodeTo.asExpr().getNode() = call.getNode()
5262
)
63+
or
64+
// In normal data-flow, this will be a local flow step. But for type tracking
65+
// we model it as a returning flow step, in order to avoid computing a potential
66+
// self-cross product of all calls to a function that returns one of its parameters
67+
// (only to later filter that flow out using `TypeTracker::append`).
68+
nodeTo.(DataFlowPrivate::SynthReturnNode).getAnInput() = nodeFrom
5369
}
5470

5571
/**
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
import ruby
22
import codeql.ruby.dataflow.internal.DataFlowPrivate
33

4-
select any(ReturnNode node)
4+
select any(ReturningNode node)

ql/test/query-tests/security/cwe-601/UrlRedirect.expected

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@ edges
33
| UrlRedirect.rb:14:17:14:22 | call to params : | UrlRedirect.rb:14:17:14:43 | call to fetch |
44
| UrlRedirect.rb:19:17:19:22 | call to params : | UrlRedirect.rb:19:17:19:37 | call to to_unsafe_hash |
55
| UrlRedirect.rb:24:31:24:36 | call to params : | UrlRedirect.rb:24:17:24:37 | call to filter_params |
6-
| UrlRedirect.rb:24:31:24:36 | call to params : | UrlRedirect.rb:56:21:56:32 | input_params : |
76
| UrlRedirect.rb:34:20:34:25 | call to params : | UrlRedirect.rb:34:17:34:37 | "#{...}/foo" |
8-
| UrlRedirect.rb:56:21:56:32 | input_params : | UrlRedirect.rb:57:5:57:29 | call to permit : |
97
nodes
108
| UrlRedirect.rb:4:17:4:22 | call to params | semmle.label | call to params |
119
| UrlRedirect.rb:9:17:9:22 | call to params : | semmle.label | call to params : |
@@ -18,10 +16,7 @@ nodes
1816
| UrlRedirect.rb:24:31:24:36 | call to params : | semmle.label | call to params : |
1917
| UrlRedirect.rb:34:17:34:37 | "#{...}/foo" | semmle.label | "#{...}/foo" |
2018
| UrlRedirect.rb:34:20:34:25 | call to params : | semmle.label | call to params : |
21-
| UrlRedirect.rb:56:21:56:32 | input_params : | semmle.label | input_params : |
22-
| UrlRedirect.rb:57:5:57:29 | call to permit : | semmle.label | call to permit : |
2319
subpaths
24-
| UrlRedirect.rb:24:31:24:36 | call to params : | UrlRedirect.rb:56:21:56:32 | input_params : | UrlRedirect.rb:57:5:57:29 | call to permit : | UrlRedirect.rb:24:17:24:37 | call to filter_params : |
2520
#select
2621
| UrlRedirect.rb:4:17:4:22 | call to params | UrlRedirect.rb:4:17:4:22 | call to params | UrlRedirect.rb:4:17:4:22 | call to params | Untrusted URL redirection due to $@. | UrlRedirect.rb:4:17:4:22 | call to params | a user-provided value |
2722
| UrlRedirect.rb:9:17:9:28 | ...[...] | UrlRedirect.rb:9:17:9:22 | call to params : | UrlRedirect.rb:9:17:9:28 | ...[...] | Untrusted URL redirection due to $@. | UrlRedirect.rb:9:17:9:22 | call to params | a user-provided value |

ql/test/query-tests/security/cwe-798/HardcodedCredentials.expected

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ edges
66
| HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" : | HardcodedCredentials.rb:23:19:23:20 | pw : |
77
| HardcodedCredentials.rb:23:19:23:20 | pw : | HardcodedCredentials.rb:1:23:1:30 | password |
88
| HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : | HardcodedCredentials.rb:31:18:31:23 | passwd |
9+
| HardcodedCredentials.rb:43:29:43:43 | "[email protected]" : | HardcodedCredentials.rb:43:18:43:25 | username |
10+
| HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" : | HardcodedCredentials.rb:43:46:43:53 | password |
911
nodes
1012
| HardcodedCredentials.rb:1:23:1:30 | password | semmle.label | password |
1113
| HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | semmle.label | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." |
@@ -19,6 +21,10 @@ nodes
1921
| HardcodedCredentials.rb:23:19:23:20 | pw : | semmle.label | pw : |
2022
| HardcodedCredentials.rb:31:18:31:23 | passwd | semmle.label | passwd |
2123
| HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : | semmle.label | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : |
24+
| HardcodedCredentials.rb:43:18:43:25 | username | semmle.label | username |
25+
| HardcodedCredentials.rb:43:29:43:43 | "[email protected]" : | semmle.label | "[email protected]" : |
26+
| HardcodedCredentials.rb:43:46:43:53 | password | semmle.label | password |
27+
| HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" : | semmle.label | "abcdef123456" : |
2228
subpaths
2329
#select
2430
| HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | Use of $@. | HardcodedCredentials.rb:4:20:4:65 | "xwjVWdfzfRlbcgKkbSfG/xSrUeHYq..." | hardcoded credentials |
@@ -29,3 +35,5 @@ subpaths
2935
| HardcodedCredentials.rb:20:11:20:76 | "3jOe7sXKX6Tx52qHWUVqh2t9LNsE+..." | HardcodedCredentials.rb:20:11:20:76 | "3jOe7sXKX6Tx52qHWUVqh2t9LNsE+..." : | HardcodedCredentials.rb:1:23:1:30 | password | Use of $@. | HardcodedCredentials.rb:20:11:20:76 | "3jOe7sXKX6Tx52qHWUVqh2t9LNsE+..." | hardcoded credentials |
3036
| HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" | HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" : | HardcodedCredentials.rb:1:23:1:30 | password | Use of $@. | HardcodedCredentials.rb:21:12:21:37 | "4fQuzXef4f2yow8KWvIJTA==" | hardcoded credentials |
3137
| HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." | HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." : | HardcodedCredentials.rb:31:18:31:23 | passwd | Use of $@. | HardcodedCredentials.rb:38:40:38:85 | "kdW/xVhiv6y1fQQNevDpUaq+2rfPK..." | hardcoded credentials |
38+
| HardcodedCredentials.rb:43:29:43:43 | "[email protected]" | HardcodedCredentials.rb:43:29:43:43 | "[email protected]" : | HardcodedCredentials.rb:43:18:43:25 | username | Use of $@. | HardcodedCredentials.rb:43:29:43:43 | "[email protected]" | hardcoded credentials |
39+
| HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" | HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" : | HardcodedCredentials.rb:43:46:43:53 | password | Use of $@. | HardcodedCredentials.rb:43:57:43:70 | "abcdef123456" | hardcoded credentials |

ql/test/query-tests/security/cwe-798/HardcodedCredentials.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,7 @@ def include?(passwd)
3939

4040
# Call to unrelated method with same name (should not be flagged)
4141
"foobar".include?("foo")
42+
43+
def default_cred(username = "[email protected]", password = "abcdef123456")
44+
username
45+
end

0 commit comments

Comments
 (0)