Skip to content

Commit 01c0d4b

Browse files
committed
Ruby: Support more flow through keyword arguments
1 parent 38ede25 commit 01c0d4b

File tree

3 files changed

+112
-23
lines changed

3 files changed

+112
-23
lines changed

ruby/ql/consistency-queries/DataFlowConsistency.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ private class MyConsistencyConfiguration extends ConsistencyConfiguration {
1010
or
1111
n instanceof SummaryNode
1212
or
13-
n instanceof HashSplatArgumentsNode
13+
n instanceof SynthHashSplatArgumentsNode
1414
}
1515
}

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 97 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ private module Cached {
227227
} or
228228
TSelfParameterNode(MethodBase m) or
229229
TBlockParameterNode(MethodBase m) or
230+
TSynthHashSplatParameterNode(MethodBase m) { m.getAParameter() instanceof KeywordParameter } or
230231
TExprPostUpdateNode(CfgNodes::ExprCfgNode n) {
231232
n instanceof Argument or
232233
n = any(CfgNodes::ExprNodes::InstanceVariableAccessCfgNode v).getReceiver()
@@ -240,12 +241,13 @@ private module Cached {
240241
TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
241242
FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
242243
} or
243-
THashSplatArgumentsNode(CfgNodes::ExprNodes::CallCfgNode c) {
244+
TSynthHashSplatArgumentsNode(CfgNodes::ExprNodes::CallCfgNode c) {
244245
exists(Argument arg | arg.isArgumentOf(c, any(ArgumentPosition pos | pos.isKeyword(_))))
245246
}
246247

247248
class TParameterNode =
248-
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or TSummaryParameterNode;
249+
TNormalParameterNode or TBlockParameterNode or TSelfParameterNode or
250+
TSynthHashSplatParameterNode or TSummaryParameterNode;
249251

250252
private predicate defaultValueFlow(NamedParameter p, ExprNode e) {
251253
p.(OptionalParameter).getDefaultValue() = e.getExprNode().getExpr()
@@ -328,18 +330,21 @@ private module Cached {
328330

329331
cached
330332
predicate isLocalSourceNode(Node n) {
331-
n instanceof ParameterNode
332-
or
333-
n instanceof PostUpdateNodes::ExprPostUpdateNode
334-
or
335-
// Nodes that can't be reached from another entry definition or expression.
336-
not reachedFromExprOrEntrySsaDef(n)
337-
or
338-
// Ensure all entry SSA definitions are local sources -- for parameters, this
339-
// is needed by type tracking. Note that when the parameter has a default value,
340-
// it will be reachable from an expression (the default value) and therefore
341-
// won't be caught by the rule above.
342-
entrySsaDefinition(n)
333+
not n instanceof SynthHashSplatParameterNode and
334+
(
335+
n instanceof ParameterNode
336+
or
337+
n instanceof PostUpdateNodes::ExprPostUpdateNode
338+
or
339+
// Nodes that can't be reached from another entry definition or expression.
340+
not reachedFromExprOrEntrySsaDef(n)
341+
or
342+
// Ensure all entry SSA definitions are local sources -- for parameters, this
343+
// is needed by type tracking. Note that when the parameter has a default value,
344+
// it will be reachable from an expression (the default value) and therefore
345+
// won't be caught by the rule above.
346+
entrySsaDefinition(n)
347+
)
343348
}
344349

345350
cached
@@ -415,7 +420,9 @@ predicate nodeIsHidden(Node n) {
415420
or
416421
n instanceof SynthReturnNode
417422
or
418-
n instanceof HashSplatArgumentsNode
423+
n instanceof SynthHashSplatParameterNode
424+
or
425+
n instanceof SynthHashSplatArgumentsNode
419426
}
420427

421428
/** An SSA definition, viewed as a node in a data flow graph. */
@@ -570,6 +577,74 @@ private module ParameterNodes {
570577
}
571578
}
572579

580+
/**
581+
* For all methods containing keyword parameters, we construct a synthesized
582+
* (hidden) parameter node to contain all keyword arguments. This allows us
583+
* to handle cases like
584+
*
585+
* ```rb
586+
* def foo(p1:, p2:)
587+
* sink(p1)
588+
* sink(p2)
589+
* end
590+
*
591+
* args = {:p1 => taint(1), :p2 => taint(2) }
592+
* foo(**args)
593+
* ```
594+
*
595+
* by adding read steps out of the synthesized parameter node to the relevant
596+
* keyword parameters.
597+
*
598+
* Note that this will introduce a bit of redundancy in cases like
599+
*
600+
* ```rb
601+
* foo(p1: taint(1), p2: taint(2))
602+
* ```
603+
*
604+
* where direct keyword matching is possible, since we construct a synthesized hash
605+
* splat argument (`SynthHashSplatArgumentsNode`) at the call site, which means that
606+
* `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
607+
* nodes (and similarly for `p2`). However, this redunancy is OK since
608+
* (a) it means that type-tracking through keyword arguments also works in most cases,
609+
* (b) read/store steps can be avoided when direct keyword matching is possible, and
610+
* hence access path limits are not a concern, and
611+
* (c) since the synthesized nodes are hidden, the reported data-flow paths will be
612+
* collapsed anyway.
613+
*/
614+
class SynthHashSplatParameterNode extends ParameterNodeImpl, TSynthHashSplatParameterNode {
615+
private MethodBase method;
616+
617+
SynthHashSplatParameterNode() { this = TSynthHashSplatParameterNode(method) }
618+
619+
final Callable getMethod() { result = method }
620+
621+
/**
622+
* Gets a keyword parameter that will be the result of reading `c` out of this
623+
* synthesized node.
624+
*/
625+
NormalParameterNode getAKeywordParameter(ContentSet c) {
626+
exists(KeywordParameter p |
627+
p = result.getParameter() and
628+
p = method.getAParameter()
629+
|
630+
c = getKeywordContent(p.getName()) or
631+
c.isSingleton(TUnknownElementContent())
632+
)
633+
}
634+
635+
override Parameter getParameter() { none() }
636+
637+
override predicate isSourceParameterOf(Callable c, ParameterPosition pos) {
638+
c = method and pos.isHashSplat()
639+
}
640+
641+
override CfgScope getCfgScope() { result = method }
642+
643+
override Location getLocationImpl() { result = method.getLocation() }
644+
645+
override string toStringImpl() { result = "**kwargs" }
646+
}
647+
573648
/** A parameter for a library callable with a flow summary. */
574649
class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
575650
private FlowSummaryImpl::Public::SummarizedCallable sc;
@@ -689,10 +764,10 @@ private module ArgumentNodes {
689764
* part of the method signature, such that those cannot end up in the hash-splat
690765
* parameter.
691766
*/
692-
class HashSplatArgumentsNode extends ArgumentNode, THashSplatArgumentsNode {
767+
class SynthHashSplatArgumentsNode extends ArgumentNode, TSynthHashSplatArgumentsNode {
693768
CfgNodes::ExprNodes::CallCfgNode c;
694769

695-
HashSplatArgumentsNode() { this = THashSplatArgumentsNode(c) }
770+
SynthHashSplatArgumentsNode() { this = TSynthHashSplatArgumentsNode(c) }
696771

697772
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
698773
this.sourceArgumentOf(call.asCall(), pos)
@@ -704,10 +779,10 @@ private module ArgumentNodes {
704779
}
705780
}
706781

707-
private class HashSplatArgumentsNodeImpl extends NodeImpl, THashSplatArgumentsNode {
782+
private class SynthHashSplatArgumentsNodeImpl extends NodeImpl, TSynthHashSplatArgumentsNode {
708783
CfgNodes::ExprNodes::CallCfgNode c;
709784

710-
HashSplatArgumentsNodeImpl() { this = THashSplatArgumentsNode(c) }
785+
SynthHashSplatArgumentsNodeImpl() { this = TSynthHashSplatArgumentsNode(c) }
711786

712787
override CfgScope getCfgScope() { result = c.getExpr().getCfgScope() }
713788

@@ -929,7 +1004,7 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
9291004
or
9301005
// Wrap all keyword arguments in a synthesized hash-splat argument node
9311006
exists(CfgNodes::ExprNodes::CallCfgNode call, ArgumentPosition keywordPos, string name |
932-
node2 = THashSplatArgumentsNode(call) and
1007+
node2 = TSynthHashSplatArgumentsNode(call) and
9331008
node1.asExpr().(Argument).isArgumentOf(call, keywordPos) and
9341009
keywordPos.isKeyword(name) and
9351010
c = getKeywordContent(name)
@@ -962,6 +1037,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
9621037
))
9631038
)
9641039
or
1040+
node2 = node1.(SynthHashSplatParameterNode).getAKeywordParameter(c)
1041+
or
9651042
FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c, node2)
9661043
}
9671044

ruby/ql/test/library-tests/dataflow/params/params-flow.expected

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
failures
2-
| params_flow.rb:17:13:17:82 | # $ hasValueFlow=3 $ hasValueFlow=6 $ hasValueFlow=8 $ hasValueFlow=16 | Missing result:hasValueFlow=16 |
3-
| params_flow.rb:26:13:26:66 | # $ hasValueFlow=9 $ hasValueFlow=13 $ hasValueFlow=14 | Missing result:hasValueFlow=14 |
42
edges
53
| params_flow.rb:9:16:9:17 | p1 : | params_flow.rb:10:10:10:11 | p1 |
64
| params_flow.rb:9:20:9:21 | p2 : | params_flow.rb:11:10:11:11 | p2 |
@@ -28,10 +26,16 @@ edges
2826
| params_flow.rb:35:12:35:20 | call to taint : | params_flow.rb:25:12:25:13 | p1 : |
2927
| params_flow.rb:35:23:35:28 | ** ... [element :p3] : | params_flow.rb:25:17:25:24 | **kwargs [element :p3] : |
3028
| params_flow.rb:35:25:35:28 | args [element :p3] : | params_flow.rb:35:23:35:28 | ** ... [element :p3] : |
29+
| params_flow.rb:37:16:37:24 | call to taint : | params_flow.rb:38:10:38:13 | args [element :p1] : |
3130
| params_flow.rb:37:34:37:42 | call to taint : | params_flow.rb:38:10:38:13 | args [element :p2] : |
31+
| params_flow.rb:38:8:38:13 | ** ... [element :p1] : | params_flow.rb:25:12:25:13 | p1 : |
3232
| params_flow.rb:38:8:38:13 | ** ... [element :p2] : | params_flow.rb:25:17:25:24 | **kwargs [element :p2] : |
33+
| params_flow.rb:38:10:38:13 | args [element :p1] : | params_flow.rb:38:8:38:13 | ** ... [element :p1] : |
3334
| params_flow.rb:38:10:38:13 | args [element :p2] : | params_flow.rb:38:8:38:13 | ** ... [element :p2] : |
35+
| params_flow.rb:40:16:40:24 | call to taint : | params_flow.rb:41:26:41:29 | args [element :p1] : |
3436
| params_flow.rb:41:13:41:21 | call to taint : | params_flow.rb:16:18:16:19 | p2 : |
37+
| params_flow.rb:41:24:41:29 | ** ... [element :p1] : | params_flow.rb:16:13:16:14 | p1 : |
38+
| params_flow.rb:41:26:41:29 | args [element :p1] : | params_flow.rb:41:24:41:29 | ** ... [element :p1] : |
3539
nodes
3640
| params_flow.rb:9:16:9:17 | p1 : | semmle.label | p1 : |
3741
| params_flow.rb:9:20:9:21 | p2 : | semmle.label | p2 : |
@@ -66,23 +70,31 @@ nodes
6670
| params_flow.rb:35:12:35:20 | call to taint : | semmle.label | call to taint : |
6771
| params_flow.rb:35:23:35:28 | ** ... [element :p3] : | semmle.label | ** ... [element :p3] : |
6872
| params_flow.rb:35:25:35:28 | args [element :p3] : | semmle.label | args [element :p3] : |
73+
| params_flow.rb:37:16:37:24 | call to taint : | semmle.label | call to taint : |
6974
| params_flow.rb:37:34:37:42 | call to taint : | semmle.label | call to taint : |
75+
| params_flow.rb:38:8:38:13 | ** ... [element :p1] : | semmle.label | ** ... [element :p1] : |
7076
| params_flow.rb:38:8:38:13 | ** ... [element :p2] : | semmle.label | ** ... [element :p2] : |
77+
| params_flow.rb:38:10:38:13 | args [element :p1] : | semmle.label | args [element :p1] : |
7178
| params_flow.rb:38:10:38:13 | args [element :p2] : | semmle.label | args [element :p2] : |
79+
| params_flow.rb:40:16:40:24 | call to taint : | semmle.label | call to taint : |
7280
| params_flow.rb:41:13:41:21 | call to taint : | semmle.label | call to taint : |
81+
| params_flow.rb:41:24:41:29 | ** ... [element :p1] : | semmle.label | ** ... [element :p1] : |
82+
| params_flow.rb:41:26:41:29 | args [element :p1] : | semmle.label | args [element :p1] : |
7383
subpaths
7484
#select
7585
| params_flow.rb:10:10:10:11 | p1 | params_flow.rb:14:12:14:19 | call to taint : | params_flow.rb:10:10:10:11 | p1 | $@ | params_flow.rb:14:12:14:19 | call to taint : | call to taint : |
7686
| params_flow.rb:11:10:11:11 | p2 | params_flow.rb:14:22:14:29 | call to taint : | params_flow.rb:11:10:11:11 | p2 | $@ | params_flow.rb:14:22:14:29 | call to taint : | call to taint : |
7787
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:21:13:21:20 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:21:13:21:20 | call to taint : | call to taint : |
7888
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:22:27:22:34 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:22:27:22:34 | call to taint : | call to taint : |
7989
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:23:33:23:40 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:23:33:23:40 | call to taint : | call to taint : |
90+
| params_flow.rb:17:10:17:11 | p1 | params_flow.rb:40:16:40:24 | call to taint : | params_flow.rb:17:10:17:11 | p1 | $@ | params_flow.rb:40:16:40:24 | call to taint : | call to taint : |
8091
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:21:27:21:34 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:21:27:21:34 | call to taint : | call to taint : |
8192
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:22:13:22:20 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:22:13:22:20 | call to taint : | call to taint : |
8293
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:23:16:23:23 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:23:16:23:23 | call to taint : | call to taint : |
8394
| params_flow.rb:18:10:18:11 | p2 | params_flow.rb:41:13:41:21 | call to taint : | params_flow.rb:18:10:18:11 | p2 | $@ | params_flow.rb:41:13:41:21 | call to taint : | call to taint : |
8495
| params_flow.rb:26:10:26:11 | p1 | params_flow.rb:33:12:33:19 | call to taint : | params_flow.rb:26:10:26:11 | p1 | $@ | params_flow.rb:33:12:33:19 | call to taint : | call to taint : |
8596
| params_flow.rb:26:10:26:11 | p1 | params_flow.rb:35:12:35:20 | call to taint : | params_flow.rb:26:10:26:11 | p1 | $@ | params_flow.rb:35:12:35:20 | call to taint : | call to taint : |
97+
| params_flow.rb:26:10:26:11 | p1 | params_flow.rb:37:16:37:24 | call to taint : | params_flow.rb:26:10:26:11 | p1 | $@ | params_flow.rb:37:16:37:24 | call to taint : | call to taint : |
8698
| params_flow.rb:28:10:28:22 | ( ... ) | params_flow.rb:33:26:33:34 | call to taint : | params_flow.rb:28:10:28:22 | ( ... ) | $@ | params_flow.rb:33:26:33:34 | call to taint : | call to taint : |
8799
| params_flow.rb:28:10:28:22 | ( ... ) | params_flow.rb:37:34:37:42 | call to taint : | params_flow.rb:28:10:28:22 | ( ... ) | $@ | params_flow.rb:37:34:37:42 | call to taint : | call to taint : |
88100
| params_flow.rb:29:10:29:22 | ( ... ) | params_flow.rb:33:41:33:49 | call to taint : | params_flow.rb:29:10:29:22 | ( ... ) | $@ | params_flow.rb:33:41:33:49 | call to taint : | call to taint : |

0 commit comments

Comments
 (0)