Skip to content

Commit 5fff9fa

Browse files
committed
More precise flow into splat parameters
We now precisely track flow from positional arguments to splat parameters, provided that splat arguments are not used and there are no positional parameters after the splat parameter. For example, in this case: def f(x, y, *z); end f(a, b, c, d) we get flow from `c` to `z[0]` and `d` to `z[1]`. We get false flow if there are positional parameters after the splat parameter. For example in this case: def g(x, y, *z, w); end g(a, b, c, d) we get flow from `d` to `z[0]` instead of `w`. We also track flow in this case def f(a, *b) sink b[0] end f(1, *[taint, 2])
1 parent a58aa17 commit 5fff9fa

File tree

6 files changed

+786
-3
lines changed

6 files changed

+786
-3
lines changed

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ private module Cached {
440440
THashSplatArgumentPosition() or
441441
TSplatAllArgumentPosition() or
442442
TSplatArgumentPosition(int pos) { exists(Call c | c.getArgument(pos) instanceof SplatExpr) } or
443+
TSynthSplatArgumentPosition() or
443444
TAnyArgumentPosition() or
444445
TAnyKeywordArgumentPosition()
445446

@@ -473,6 +474,7 @@ private module Cached {
473474
exists(Parameter p | p.getPosition() = pos and p instanceof SplatParameter)
474475
} or
475476
TSynthSplatParameterPosition() or
477+
TSynthArgSplatParameterPosition() or
476478
TAnyParameterPosition() or
477479
TAnyKeywordParameterPosition()
478480
}
@@ -1295,6 +1297,9 @@ class ParameterPosition extends TParameterPosition {
12951297

12961298
predicate isSynthSplat() { this = TSynthSplatParameterPosition() }
12971299

1300+
// A fake position to indicate that this parameter node holds content from a synth arg splat node
1301+
predicate isSynthArgSplat() { this = TSynthArgSplatParameterPosition() }
1302+
12981303
predicate isSplatAll() { this = TSplatAllParameterPosition() }
12991304

13001305
predicate isSplat(int n) { this = TSplatParameterPosition(n) }
@@ -1332,6 +1337,8 @@ class ParameterPosition extends TParameterPosition {
13321337
or
13331338
this.isSynthSplat() and result = "synthetic *"
13341339
or
1340+
this.isSynthArgSplat() and result = "synthetic * (from *args)"
1341+
or
13351342
exists(int pos | this.isSplat(pos) and result = "* (position " + pos + ")")
13361343
}
13371344
}
@@ -1369,6 +1376,8 @@ class ArgumentPosition extends TArgumentPosition {
13691376

13701377
predicate isSplat(int n) { this = TSplatArgumentPosition(n) }
13711378

1379+
predicate isSynthSplat() { this = TSynthSplatArgumentPosition() }
1380+
13721381
/** Gets a textual representation of this position. */
13731382
string toString() {
13741383
this.isSelf() and result = "self"
@@ -1387,6 +1396,8 @@ class ArgumentPosition extends TArgumentPosition {
13871396
or
13881397
this.isSplatAll() and result = "*"
13891398
or
1399+
this.isSynthSplat() and result = "synthetic *"
1400+
or
13901401
exists(int pos | this.isSplat(pos) and result = "* (position " + pos + ")")
13911402
}
13921403
}
@@ -1418,6 +1429,8 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
14181429
or
14191430
ppos.isSplatAll() and apos.isSplatAll()
14201431
or
1432+
ppos.isSplatAll() and apos.isSynthSplat()
1433+
or
14211434
ppos.isSynthSplat() and apos.isSplatAll()
14221435
or
14231436
// Exact splat match
@@ -1430,6 +1443,13 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
14301443
ppos.isAnyNamed() and apos.isKeyword(_)
14311444
or
14321445
apos.isAnyNamed() and ppos.isKeyword(_)
1446+
or
1447+
ppos.isSynthSplat() and apos.isSplatAll()
1448+
or
1449+
// Exact splat match
1450+
exists(int n | apos.isSplat(n) and ppos.isSplat(n))
1451+
or
1452+
apos.isSynthSplat() and ppos.isSynthArgSplat()
14331453
}
14341454

14351455
/**

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,15 @@ private module Cached {
312312
exists(c.asCallable()) and // exclude library callables
313313
isParameterNode(_, c, any(ParameterPosition p | p.isPositional(_)))
314314
} or
315+
TSynthSplatArgParameterNode(DataFlowCallable c) {
316+
exists(c.asCallable()) and // exclude library callables
317+
isParameterNode(_, c, any(ParameterPosition p | p.isSplat(_)))
318+
} or
319+
TSynthSplatParameterElementNode(DataFlowCallable c, int n) {
320+
exists(c.asCallable()) and // exclude library callables
321+
exists(ArgumentPosition p | p.isPositional(n)) and
322+
isParameterNode(_, c, any(ParameterPosition p | p.isSplat(_)))
323+
} or
315324
TExprPostUpdateNode(CfgNodes::ExprCfgNode n) {
316325
// filter out nodes that clearly don't need post-update nodes
317326
isNonConstantExpr(n) and
@@ -326,11 +335,17 @@ private module Cached {
326335
exists(Argument arg | arg.isArgumentOf(c, any(ArgumentPosition pos | pos.isKeyword(_))))
327336
or
328337
c.getAnArgument() instanceof CfgNodes::ExprNodes::PairCfgNode
338+
} or
339+
TSynthSplatArgumentNode(CfgNodes::ExprNodes::CallCfgNode c) {
340+
not exists(Argument arg, ArgumentPosition pos | pos.isSplat(_) or pos.isSplatAll() |
341+
arg.isArgumentOf(c, pos)
342+
)
329343
}
330344

331345
class TSourceParameterNode =
332346
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or
333-
TSynthHashSplatParameterNode or TSynthSplatParameterNode;
347+
TSynthHashSplatParameterNode or TSynthSplatParameterNode or TSynthSplatArgParameterNode or
348+
TSynthSplatParameterElementNode;
334349

335350
cached
336351
Location getLocation(NodeImpl n) { result = n.getLocationImpl() }
@@ -528,6 +543,12 @@ predicate nodeIsHidden(Node n) {
528543
n instanceof SynthHashSplatArgumentNode
529544
or
530545
n instanceof SynthSplatParameterNode
546+
or
547+
n instanceof SynthSplatArgumentNode
548+
or
549+
n instanceof SynthSplatArgParameterNode
550+
or
551+
n instanceof SynthSplatParameterElementNode
531552
}
532553

533554
/** An SSA definition, viewed as a node in a data flow graph. */
@@ -835,6 +856,50 @@ private module ParameterNodes {
835856
final override string toStringImpl() { result = "synthetic *args" }
836857
}
837858

859+
/**
860+
* A node that holds all positional arguments passed in a call to `c`.
861+
* This is a mirror of the `SynthSplatArgumentNode` on the callable side.
862+
* See `SynthSplatArgumentNode` for more information.
863+
*/
864+
class SynthSplatArgParameterNode extends ParameterNodeImpl, TSynthSplatArgParameterNode {
865+
private DataFlowCallable callable;
866+
867+
SynthSplatArgParameterNode() { this = TSynthSplatArgParameterNode(callable) }
868+
869+
final override Parameter getParameter() { none() }
870+
871+
final override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
872+
c = callable and pos.isSynthArgSplat()
873+
}
874+
875+
final override CfgScope getCfgScope() { result = callable.asCallable() }
876+
877+
final override DataFlowCallable getEnclosingCallable() { result = callable }
878+
879+
final override Location getLocationImpl() { result = callable.getLocation() }
880+
881+
final override string toStringImpl() { result = "synthetic *args" }
882+
}
883+
884+
/**
885+
* A node that holds the content of a specific positional argument.
886+
* See `SynthSplatArgumentNode` for more information.
887+
*/
888+
class SynthSplatParameterElementNode extends TSynthSplatParameterElementNode {
889+
private DataFlowCallable callable;
890+
private int pos;
891+
892+
SynthSplatParameterElementNode() { this = TSynthSplatParameterElementNode(callable, pos) }
893+
894+
int getPosition() { result = pos }
895+
896+
DataFlowCallable getCallable() { result = callable }
897+
898+
string toString() { result = "synthetic *args[" + pos + "]" }
899+
900+
Location getLocation() { result = callable.getLocation() }
901+
}
902+
838903
/** A parameter for a library callable with a flow summary. */
839904
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
840905
private ParameterPosition pos_;
@@ -980,6 +1045,57 @@ private module ArgumentNodes {
9801045

9811046
override string toStringImpl() { result = "**" }
9821047
}
1048+
1049+
/**
1050+
* A data-flow node that represents all arguments passed to the call.
1051+
* We use this to model data flow via splat parameters.
1052+
* Consider this example:
1053+
*
1054+
* ```rb
1055+
* def foo(x, y, *z)
1056+
* end
1057+
*
1058+
* foo(1, 2, 3, 4)
1059+
* ```
1060+
*
1061+
* 1. We want `3` to flow to `z[0]` and `4` to flow to `z[1]`. We model this by first storing all arguments
1062+
* in a synthetic argument node `SynthSplatArgumentNode` (see `storeStepCommon`).
1063+
* 2. We match this to an analogous parameter node `SynthSplatArgParameterNode` on the callee side
1064+
* (see `parameterMatch`).
1065+
* 3. For each content element stored in the `SynthSplatArgParameterNode`, we add a read step to a separate
1066+
* `SynthSplatParameterElementNode`, which is parameterised by the element index (see `readStep`).
1067+
* 4. Finally, we add store steps from these `SynthSplatParameterElementNode`s to the real splat parameter node
1068+
* (see `storeStep`).
1069+
* We only add store steps for elements that will not flow to the earlier positional parameters.
1070+
* In practice that means we ignore elements at index `<= N`, where `N` is the index of the splat parameter.
1071+
* For the remaining elements we subtract `N` from their index and store them in the splat parameter.
1072+
*/
1073+
class SynthSplatArgumentNode extends ArgumentNode, TSynthSplatArgumentNode {
1074+
CfgNodes::ExprNodes::CallCfgNode c;
1075+
1076+
SynthSplatArgumentNode() { this = TSynthSplatArgumentNode(c) }
1077+
1078+
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
1079+
this.sourceArgumentOf(call.asCall(), pos)
1080+
}
1081+
1082+
override predicate sourceArgumentOf(CfgNodes::ExprNodes::CallCfgNode call, ArgumentPosition pos) {
1083+
call = c and
1084+
pos.isSynthSplat()
1085+
}
1086+
}
1087+
1088+
private class SynthSplatArgumentNodeImpl extends NodeImpl, TSynthSplatArgumentNode {
1089+
CfgNodes::ExprNodes::CallCfgNode c;
1090+
1091+
SynthSplatArgumentNodeImpl() { this = TSynthSplatArgumentNode(c) }
1092+
1093+
override CfgScope getCfgScope() { result = c.getExpr().getCfgScope() }
1094+
1095+
override Location getLocationImpl() { result = c.getLocation() }
1096+
1097+
override string toStringImpl() { result = "*" }
1098+
}
9831099
}
9841100

9851101
import ArgumentNodes
@@ -1215,6 +1331,22 @@ predicate storeStepCommon(Node node1, ContentSet c, Node node2) {
12151331
c.isSingleton(TKnownElementContent(cv))
12161332
)
12171333
)
1334+
or
1335+
exists(CfgNodes::ExprNodes::CallCfgNode call, ArgumentPosition pos |
1336+
node2 = TSynthSplatArgumentNode(call) and
1337+
node1.asExpr().(Argument).isArgumentOf(call, pos)
1338+
|
1339+
exists(int n | pos.isPositional(n) and c = getPositionalContent(n))
1340+
)
1341+
or
1342+
// Store from SynthSplatParameterElementNode[n] into SplatParameterNode[m]
1343+
// where m = n - <position of SplatParameterNode>
1344+
exists(SynthSplatParameterElementNode elemNode, NormalParameterNode splatNode, int splatPos |
1345+
elemNode = node1 and splatNode = node2
1346+
|
1347+
splatNode.isParameterOf(elemNode.getCallable(), any(ParameterPosition p | p.isSplat(splatPos))) and
1348+
c = getPositionalContent(elemNode.getPosition() - splatPos)
1349+
)
12181350
}
12191351

12201352
/**
@@ -1284,6 +1416,15 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
12841416
or
12851417
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
12861418
node2.(FlowSummaryNode).getSummaryNode())
1419+
or
1420+
// Read from SynthSplatArgParameterNode into SynthSplatParameterElementNode
1421+
exists(SynthSplatArgParameterNode fromNode, SynthSplatParameterElementNode toNode, int pos |
1422+
node1 = fromNode and node2 = toNode
1423+
|
1424+
fromNode.isParameterOf(toNode.getCallable(), _) and
1425+
c = getPositionalContent(pos) and
1426+
toNode.getPosition() = pos
1427+
)
12871428
}
12881429

12891430
/**

0 commit comments

Comments
 (0)