Skip to content

Commit 72356d1

Browse files
committed
Ruby: track flow from *args to positional params
This models flow in the following case: def foo(x, y) sink x # 1 sink y # 2 end args = [source 1, source 2] foo(*args) We do this by introducing a SynthSplatParameterNode which accepts content from the splat argument, if one is given at the callsite. From this node we add read steps to each positional parameter.
1 parent 01ff690 commit 72356d1

File tree

6 files changed

+270
-25
lines changed

6 files changed

+270
-25
lines changed

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ private module Cached {
439439
} or
440440
THashSplatArgumentPosition() or
441441
TSplatAllArgumentPosition() or
442+
TSplatArgumentPosition(int pos) { exists(Call c | c.getArgument(pos) instanceof SplatExpr) } or
442443
TAnyArgumentPosition() or
443444
TAnyKeywordArgumentPosition()
444445

@@ -468,6 +469,10 @@ private module Cached {
468469
// synthetic parameter position.
469470
TSynthHashSplatParameterPosition() or
470471
TSplatAllParameterPosition() or
472+
TSplatParameterPosition(int pos) {
473+
exists(Parameter p | p.getPosition() = pos and p instanceof SplatParameter)
474+
} or
475+
TSynthSplatParameterPosition() or
471476
TAnyParameterPosition() or
472477
TAnyKeywordParameterPosition()
473478
}
@@ -1288,8 +1293,12 @@ class ParameterPosition extends TParameterPosition {
12881293

12891294
predicate isSynthHashSplat() { this = TSynthHashSplatParameterPosition() }
12901295

1296+
predicate isSynthSplat() { this = TSynthSplatParameterPosition() }
1297+
12911298
predicate isSplatAll() { this = TSplatAllParameterPosition() }
12921299

1300+
predicate isSplat(int n) { this = TSplatParameterPosition(n) }
1301+
12931302
/**
12941303
* Holds if this position represents any parameter, except `self` parameters. This
12951304
* includes both positional, named, and block parameters.
@@ -1320,6 +1329,10 @@ class ParameterPosition extends TParameterPosition {
13201329
this.isAny() and result = "any"
13211330
or
13221331
this.isAnyNamed() and result = "any-named"
1332+
or
1333+
this.isSynthSplat() and result = "synthetic *"
1334+
or
1335+
exists(int pos | this.isSplat(pos) and result = "* (position " + pos + ")")
13231336
}
13241337
}
13251338

@@ -1354,6 +1367,8 @@ class ArgumentPosition extends TArgumentPosition {
13541367

13551368
predicate isSplatAll() { this = TSplatAllArgumentPosition() }
13561369

1370+
predicate isSplat(int n) { this = TSplatArgumentPosition(n) }
1371+
13571372
/** Gets a textual representation of this position. */
13581373
string toString() {
13591374
this.isSelf() and result = "self"
@@ -1371,6 +1386,8 @@ class ArgumentPosition extends TArgumentPosition {
13711386
this.isHashSplat() and result = "**"
13721387
or
13731388
this.isSplatAll() and result = "*"
1389+
or
1390+
exists(int pos | this.isSplat(pos) and result = "* (position " + pos + ")")
13741391
}
13751392
}
13761393

@@ -1408,6 +1425,11 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
14081425
ppos.isAnyNamed() and apos.isKeyword(_)
14091426
or
14101427
apos.isAnyNamed() and ppos.isKeyword(_)
1428+
or
1429+
ppos.isSynthSplat() and apos.isSplatAll()
1430+
or
1431+
// Exact splat match
1432+
exists(int n | apos.isSplat(n) and ppos.isSplat(n))
14111433
}
14121434

14131435
/**

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ private class Argument extends CfgNodes::ExprCfgNode {
245245
not this.getExpr() instanceof BlockArgument and
246246
not this.getExpr().(Pair).getKey().getConstantValue().isSymbol(_) and
247247
not this.getExpr() instanceof HashSplatExpr and
248+
not this.getExpr() instanceof SplatExpr and
248249
arg.isPositional(i)
249250
)
250251
or
@@ -261,8 +262,15 @@ private class Argument extends CfgNodes::ExprCfgNode {
261262
arg.isHashSplat()
262263
or
263264
this = call.getArgument(0) and
265+
not exists(call.getArgument(1)) and
264266
this.getExpr() instanceof SplatExpr and
265267
arg.isSplatAll()
268+
or
269+
exists(int pos | pos > 0 or exists(call.getArgument(pos + 1)) |
270+
this = call.getArgument(pos) and
271+
this.getExpr() instanceof SplatExpr and
272+
arg.isSplat(pos)
273+
)
266274
}
267275

268276
/** Holds if this expression is the `i`th argument of `c`. */
@@ -300,6 +308,10 @@ private module Cached {
300308
TSynthHashSplatParameterNode(DataFlowCallable c) {
301309
isParameterNode(_, c, any(ParameterPosition p | p.isKeyword(_)))
302310
} or
311+
TSynthSplatParameterNode(DataFlowCallable c) {
312+
exists(c.asCallable()) and // exclude library callables
313+
isParameterNode(_, c, any(ParameterPosition p | p.isPositional(_)))
314+
} or
303315
TExprPostUpdateNode(CfgNodes::ExprCfgNode n) {
304316
// filter out nodes that clearly don't need post-update nodes
305317
isNonConstantExpr(n) and
@@ -318,7 +330,7 @@ private module Cached {
318330

319331
class TSourceParameterNode =
320332
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or
321-
TSynthHashSplatParameterNode;
333+
TSynthHashSplatParameterNode or TSynthSplatParameterNode;
322334

323335
cached
324336
Location getLocation(NodeImpl n) { result = n.getLocationImpl() }
@@ -514,6 +526,8 @@ predicate nodeIsHidden(Node n) {
514526
n instanceof SynthHashSplatParameterNode
515527
or
516528
n instanceof SynthHashSplatArgumentNode
529+
or
530+
n instanceof SynthSplatParameterNode
517531
}
518532

519533
/** An SSA definition, viewed as a node in a data flow graph. */
@@ -610,7 +624,13 @@ private module ParameterNodes {
610624
pos.isHashSplat()
611625
or
612626
parameter = callable.getParameter(0).(SplatParameter) and
627+
not exists(callable.getParameter(1)) and
613628
pos.isSplatAll()
629+
or
630+
exists(int n | n > 0 |
631+
parameter = callable.getParameter(n).(SplatParameter) and
632+
pos.isSplat(n)
633+
)
614634
)
615635
}
616636

@@ -749,6 +769,66 @@ private module ParameterNodes {
749769
final override string toStringImpl() { result = "**kwargs" }
750770
}
751771

772+
/**
773+
* A synthetic data-flow node to allow flow to positional parameters from a splat argument.
774+
*
775+
* For example, in the following code:
776+
*
777+
* ```rb
778+
* def foo(x, y); end
779+
*
780+
* foo(*[a, b])
781+
* ```
782+
*
783+
* We want `a` to flow to `x` and `b` to flow to `y`. We do this by constructing
784+
* a `SynthSplatParameterNode` for the method `foo`, and matching the splat argument to this
785+
* parameter node via `parameterMatch/2`. We then add read steps from this node to parameters
786+
* `x` and `y`, for content at indices 0 and 1 respectively (see `readStep`).
787+
*
788+
* We don't yet correctly handle cases where the splat argument is not the first argument, e.g. in
789+
* ```rb
790+
* foo(a, *[b])
791+
* ```
792+
*/
793+
class SynthSplatParameterNode extends ParameterNodeImpl, TSynthSplatParameterNode {
794+
private DataFlowCallable callable;
795+
796+
SynthSplatParameterNode() { this = TSynthSplatParameterNode(callable) }
797+
798+
/**
799+
* Gets a parameter which will contain the value given by `c`, assuming
800+
* that the method was called with a single splat argument.
801+
* For example, if the synth splat parameter is for the following method
802+
*
803+
* ```rb
804+
* def foo(x, y, a:, *rest)
805+
* end
806+
* ```
807+
*
808+
* Then `getAParameter(element 0) = x` and `getAParameter(element 1) = y`.
809+
*/
810+
ParameterNode getAParameter(ContentSet c) {
811+
exists(int n |
812+
isParameterNode(result, callable, (any(ParameterPosition p | p.isPositional(n)))) and
813+
c = getPositionalContent(n)
814+
)
815+
}
816+
817+
final override Parameter getParameter() { none() }
818+
819+
final override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
820+
c = callable and pos.isSynthSplat()
821+
}
822+
823+
final override CfgScope getCfgScope() { result = callable.asCallable() }
824+
825+
final override DataFlowCallable getEnclosingCallable() { result = callable }
826+
827+
final override Location getLocationImpl() { result = callable.getLocation() }
828+
829+
final override string toStringImpl() { result = "synthetic *args" }
830+
}
831+
752832
/** A parameter for a library callable with a flow summary. */
753833
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
754834
private ParameterPosition pos_;
@@ -1099,6 +1179,13 @@ private ContentSet getKeywordContent(string name) {
10991179
)
11001180
}
11011181

1182+
private ContentSet getPositionalContent(int n) {
1183+
exists(ConstantValue::ConstantIntegerValue i |
1184+
result.isSingleton(TKnownElementContent(i)) and
1185+
i.isInt(n)
1186+
)
1187+
}
1188+
11021189
/**
11031190
* Subset of `storeStep` that should be shared with type-tracking.
11041191
*/
@@ -1187,6 +1274,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
11871274
or
11881275
node2 = node1.(SynthHashSplatParameterNode).getAKeywordParameter(c)
11891276
or
1277+
node2 = node1.(SynthSplatParameterNode).getAParameter(c)
1278+
or
11901279
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
11911280
node2.(FlowSummaryNode).getSummaryNode())
11921281
}

ruby/ql/test/library-tests/dataflow/local/TaintStep.expected

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2796,6 +2796,7 @@
27962796
| UseUseExplosion.rb:21:3675:21:3680 | call to use | UseUseExplosion.rb:21:3670:21:3680 | else ... |
27972797
| UseUseExplosion.rb:21:3686:21:3696 | else ... | UseUseExplosion.rb:21:9:21:3700 | if ... |
27982798
| UseUseExplosion.rb:21:3691:21:3696 | call to use | UseUseExplosion.rb:21:3686:21:3696 | else ... |
2799+
| UseUseExplosion.rb:24:5:25:7 | synthetic *args | UseUseExplosion.rb:24:13:24:13 | i |
27992800
| UseUseExplosion.rb:24:5:25:7 | use | UseUseExplosion.rb:1:1:26:3 | C |
28002801
| file://:0:0:0:0 | [summary param] position 0 in & | file://:0:0:0:0 | [summary] read: Argument[0].Element[any] in & |
28012802
| file://:0:0:0:0 | [summary param] position 0 in + | file://:0:0:0:0 | [summary] read: Argument[0].Element[any] in + |
@@ -2840,6 +2841,7 @@
28402841
| file://:0:0:0:0 | [summary] read: Argument[0].Element[any] in Hash[] | file://:0:0:0:0 | [summary] read: Argument[0].Element[any].Element[1] in Hash[] |
28412842
| local_dataflow.rb:1:1:7:3 | self (foo) | local_dataflow.rb:3:8:3:10 | self |
28422843
| local_dataflow.rb:1:1:7:3 | self in foo | local_dataflow.rb:1:1:7:3 | self (foo) |
2844+
| local_dataflow.rb:1:1:7:3 | synthetic *args | local_dataflow.rb:1:9:1:9 | a |
28432845
| local_dataflow.rb:1:1:150:3 | self (local_dataflow.rb) | local_dataflow.rb:49:1:53:3 | self |
28442846
| local_dataflow.rb:1:9:1:9 | a | local_dataflow.rb:1:9:1:9 | a |
28452847
| local_dataflow.rb:1:9:1:9 | a | local_dataflow.rb:2:7:2:7 | a |
@@ -2874,6 +2876,7 @@
28742876
| local_dataflow.rb:10:5:13:3 | __synth__0__1 | local_dataflow.rb:10:5:13:3 | __synth__0__1 |
28752877
| local_dataflow.rb:10:5:13:3 | __synth__0__1 | local_dataflow.rb:10:9:10:9 | x |
28762878
| local_dataflow.rb:10:5:13:3 | call to each | local_dataflow.rb:10:1:13:3 | ... = ... |
2879+
| local_dataflow.rb:10:5:13:3 | synthetic *args | local_dataflow.rb:10:5:13:3 | __synth__0__1 |
28772880
| local_dataflow.rb:10:9:10:9 | x | local_dataflow.rb:12:5:12:5 | x |
28782881
| local_dataflow.rb:10:14:10:18 | [post] array | local_dataflow.rb:15:10:15:14 | array |
28792882
| local_dataflow.rb:10:14:10:18 | array | local_dataflow.rb:15:10:15:14 | array |
@@ -2883,13 +2886,15 @@
28832886
| local_dataflow.rb:15:1:17:3 | __synth__0__1 | local_dataflow.rb:15:1:17:3 | __synth__0__1 |
28842887
| local_dataflow.rb:15:1:17:3 | __synth__0__1 | local_dataflow.rb:15:1:17:3 | __synth__0__1 |
28852888
| local_dataflow.rb:15:1:17:3 | __synth__0__1 | local_dataflow.rb:15:5:15:5 | x |
2889+
| local_dataflow.rb:15:1:17:3 | synthetic *args | local_dataflow.rb:15:1:17:3 | __synth__0__1 |
28862890
| local_dataflow.rb:15:10:15:14 | [post] array | local_dataflow.rb:19:10:19:14 | array |
28872891
| local_dataflow.rb:15:10:15:14 | array | local_dataflow.rb:19:10:19:14 | array |
28882892
| local_dataflow.rb:16:9:16:10 | 10 | local_dataflow.rb:16:3:16:10 | break |
28892893
| local_dataflow.rb:19:1:21:3 | __synth__0__1 | local_dataflow.rb:19:1:21:3 | ... = ... |
28902894
| local_dataflow.rb:19:1:21:3 | __synth__0__1 | local_dataflow.rb:19:1:21:3 | __synth__0__1 |
28912895
| local_dataflow.rb:19:1:21:3 | __synth__0__1 | local_dataflow.rb:19:1:21:3 | __synth__0__1 |
28922896
| local_dataflow.rb:19:1:21:3 | __synth__0__1 | local_dataflow.rb:19:5:19:5 | x |
2897+
| local_dataflow.rb:19:1:21:3 | synthetic *args | local_dataflow.rb:19:1:21:3 | __synth__0__1 |
28932898
| local_dataflow.rb:19:5:19:5 | x | local_dataflow.rb:20:6:20:6 | x |
28942899
| local_dataflow.rb:20:6:20:6 | x | local_dataflow.rb:20:6:20:10 | ... > ... |
28952900
| local_dataflow.rb:20:10:20:10 | 1 | local_dataflow.rb:20:6:20:10 | ... > ... |
@@ -2901,11 +2906,13 @@
29012906
| local_dataflow.rb:30:14:30:20 | "class" | local_dataflow.rb:30:5:30:24 | C |
29022907
| local_dataflow.rb:32:5:32:25 | bar | local_dataflow.rb:32:1:32:1 | x |
29032908
| local_dataflow.rb:32:5:32:25 | bar | local_dataflow.rb:32:1:32:25 | ... = ... |
2909+
| local_dataflow.rb:34:1:39:3 | synthetic *args | local_dataflow.rb:34:7:34:7 | x |
29042910
| local_dataflow.rb:34:7:34:7 | x | local_dataflow.rb:34:7:34:7 | x |
29052911
| local_dataflow.rb:34:7:34:7 | x | local_dataflow.rb:35:6:35:6 | x |
29062912
| local_dataflow.rb:35:6:35:6 | x | local_dataflow.rb:35:6:35:11 | ... == ... |
29072913
| local_dataflow.rb:35:11:35:11 | 4 | local_dataflow.rb:35:6:35:11 | ... == ... |
29082914
| local_dataflow.rb:36:13:36:13 | 7 | local_dataflow.rb:36:6:36:13 | return |
2915+
| local_dataflow.rb:41:1:47:3 | synthetic *args | local_dataflow.rb:41:7:41:7 | x |
29092916
| local_dataflow.rb:41:7:41:7 | x | local_dataflow.rb:41:7:41:7 | x |
29102917
| local_dataflow.rb:41:7:41:7 | x | local_dataflow.rb:42:6:42:6 | x |
29112918
| local_dataflow.rb:42:6:42:6 | x | local_dataflow.rb:42:6:42:11 | ... == ... |
@@ -2924,8 +2931,10 @@
29242931
| local_dataflow.rb:51:20:51:20 | x | local_dataflow.rb:51:20:51:24 | ... < ... |
29252932
| local_dataflow.rb:51:24:51:24 | 9 | local_dataflow.rb:51:20:51:24 | ... < ... |
29262933
| local_dataflow.rb:55:5:55:13 | Array | local_dataflow.rb:55:5:55:13 | call to [] |
2934+
| local_dataflow.rb:57:1:58:3 | synthetic *args | local_dataflow.rb:57:9:57:9 | x |
29272935
| local_dataflow.rb:60:1:90:3 | self (test_case) | local_dataflow.rb:78:12:78:20 | self |
29282936
| local_dataflow.rb:60:1:90:3 | self in test_case | local_dataflow.rb:60:1:90:3 | self (test_case) |
2937+
| local_dataflow.rb:60:1:90:3 | synthetic *args | local_dataflow.rb:60:15:60:15 | x |
29292938
| local_dataflow.rb:60:15:60:15 | x | local_dataflow.rb:60:15:60:15 | x |
29302939
| local_dataflow.rb:60:15:60:15 | x | local_dataflow.rb:61:12:61:12 | x |
29312940
| local_dataflow.rb:61:7:68:5 | SSA phi read(x) | local_dataflow.rb:69:12:69:12 | x |
@@ -3098,6 +3107,7 @@
30983107
| local_dataflow.rb:118:3:118:11 | call to source | local_dataflow.rb:118:3:118:31 | call to tap |
30993108
| local_dataflow.rb:118:3:118:11 | self | local_dataflow.rb:119:3:119:31 | self |
31003109
| local_dataflow.rb:118:17:118:31 | <captured entry> self | local_dataflow.rb:118:23:118:29 | self |
3110+
| local_dataflow.rb:118:17:118:31 | synthetic *args | local_dataflow.rb:118:20:118:20 | x |
31013111
| local_dataflow.rb:118:20:118:20 | x | local_dataflow.rb:118:20:118:20 | x |
31023112
| local_dataflow.rb:118:20:118:20 | x | local_dataflow.rb:118:28:118:28 | x |
31033113
| local_dataflow.rb:119:3:119:31 | [post] self | local_dataflow.rb:119:8:119:16 | self |
@@ -3112,8 +3122,10 @@
31123122
| local_dataflow.rb:123:8:123:20 | call to dup | local_dataflow.rb:123:8:123:45 | call to tap |
31133123
| local_dataflow.rb:123:8:123:45 | call to tap | local_dataflow.rb:123:8:123:49 | call to dup |
31143124
| local_dataflow.rb:123:26:123:45 | <captured entry> self | local_dataflow.rb:123:32:123:43 | self |
3125+
| local_dataflow.rb:123:26:123:45 | synthetic *args | local_dataflow.rb:123:29:123:29 | x |
31153126
| local_dataflow.rb:126:1:128:3 | self (use) | local_dataflow.rb:127:3:127:8 | self |
31163127
| local_dataflow.rb:126:1:128:3 | self in use | local_dataflow.rb:126:1:128:3 | self (use) |
3128+
| local_dataflow.rb:126:1:128:3 | synthetic *args | local_dataflow.rb:126:9:126:9 | x |
31173129
| local_dataflow.rb:130:1:150:3 | self (use_use_madness) | local_dataflow.rb:132:6:132:11 | self |
31183130
| local_dataflow.rb:130:1:150:3 | self in use_use_madness | local_dataflow.rb:130:1:150:3 | self (use_use_madness) |
31193131
| local_dataflow.rb:131:3:131:3 | x | local_dataflow.rb:132:10:132:10 | x |

0 commit comments

Comments
 (0)