Skip to content

Commit 1f8a291

Browse files
authored
Merge pull request #7198 from hvitved/ruby/dataflow/arrays
Ruby: Flow through arrays/enumerables
2 parents 5ba70ff + 55492ef commit 1f8a291

File tree

28 files changed

+2986
-108
lines changed

28 files changed

+2986
-108
lines changed

ruby/ql/lib/codeql/ruby/ApiGraphs.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ module API {
9898
/**
9999
* Gets a `new` call to the function represented by this API component.
100100
*/
101-
DataFlow::Node getAnInstantiation() { result = this.getInstance().getAnImmediateUse() }
101+
DataFlow::ExprNode getAnInstantiation() { result = this.getInstance().getAnImmediateUse() }
102102

103103
/**
104104
* Gets a node representing a subclass of the class represented by this node.

ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import ruby
44
import codeql.ruby.DataFlow
55
private import internal.FlowSummaryImpl as Impl
66
private import internal.DataFlowDispatch
7+
private import internal.DataFlowPrivate
78

89
// import all instances below
910
private module Summaries {
@@ -22,12 +23,34 @@ module SummaryComponent {
2223

2324
predicate content = SC::content/1;
2425

25-
/** Gets a summary component that represents a qualifier. */
26-
SummaryComponent qualifier() { result = argument(any(ParameterPosition pos | pos.isSelf())) }
26+
/** Gets a summary component that represents a receiver. */
27+
SummaryComponent receiver() { result = argument(any(ParameterPosition pos | pos.isSelf())) }
2728

2829
/** Gets a summary component that represents a block argument. */
2930
SummaryComponent block() { result = argument(any(ParameterPosition pos | pos.isBlock())) }
3031

32+
/** Gets a summary component that represents an element in an array at an unknown index. */
33+
SummaryComponent arrayElementUnknown() { result = SC::content(TUnknownArrayElementContent()) }
34+
35+
/** Gets a summary component that represents an element in an array at a known index. */
36+
bindingset[i]
37+
SummaryComponent arrayElementKnown(int i) {
38+
result = SC::content(TKnownArrayElementContent(i))
39+
or
40+
// `i` may be out of range
41+
not exists(TKnownArrayElementContent(i)) and
42+
result = arrayElementUnknown()
43+
}
44+
45+
/**
46+
* Gets a summary component that represents an element in an array at either an unknown
47+
* index or known index. This predicate should never be used in the output specification
48+
* of a flow summary; use `arrayElementUnknown()` instead.
49+
*/
50+
SummaryComponent arrayElementAny() {
51+
result in [arrayElementUnknown(), SC::content(TKnownArrayElementContent(_))]
52+
}
53+
3154
/** Gets a summary component that represents the return value of a call. */
3255
SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) }
3356
}
@@ -44,8 +67,8 @@ module SummaryComponentStack {
4467

4568
predicate argument = SCS::argument/1;
4669

47-
/** Gets a singleton stack representing a qualifier. */
48-
SummaryComponentStack qualifier() { result = singleton(SummaryComponent::qualifier()) }
70+
/** Gets a singleton stack representing a receiver. */
71+
SummaryComponentStack receiver() { result = singleton(SummaryComponent::receiver()) }
4972

5073
/** Gets a singleton stack representing a block argument. */
5174
SummaryComponentStack block() { result = singleton(SummaryComponent::block()) }
@@ -108,6 +131,17 @@ abstract class SummarizedCallable extends LibraryCallable {
108131
predicate clearsContent(ParameterPosition pos, DataFlow::Content content) { none() }
109132
}
110133

134+
/**
135+
* A callable with a flow summary, identified by a unique string, where all
136+
* calls to a method with the same name are considered relevant.
137+
*/
138+
abstract class SimpleSummarizedCallable extends SummarizedCallable {
139+
bindingset[this]
140+
SimpleSummarizedCallable() { any() }
141+
142+
final override MethodCall getACall() { result.getMethodName() = this }
143+
}
144+
111145
private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable {
112146
private SummarizedCallable sc;
113147

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ private module Cached {
250250
TPositionalParameterPosition(int pos) {
251251
pos = any(Parameter p).getPosition()
252252
or
253-
pos in [0 .. 10] // TODO: remove once `Argument[_]` summaries are replaced with `Argument[i..]`
253+
pos in [0 .. 100] // TODO: remove once `Argument[_]` summaries are replaced with `Argument[i..]`
254254
or
255255
FlowSummaryImplSpecific::ParsePositions::isParsedArgumentPosition(_, pos)
256256
} or

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,13 @@ private module Cached {
294294
}
295295

296296
cached
297-
newtype TContent = TTodoContent() // stub
297+
newtype TContent =
298+
TKnownArrayElementContent(int i) { i in [0 .. 10] } or
299+
TUnknownArrayElementContent()
298300
}
299301

302+
class TArrayElementContent = TKnownArrayElementContent or TUnknownArrayElementContent;
303+
300304
import Cached
301305

302306
/** Holds if `n` should be hidden from path explanations. */
@@ -741,8 +745,6 @@ predicate readStep(Node node1, Content c, Node node2) {
741745
* in `x.f = newValue`.
742746
*/
743747
predicate clearsContent(Node n, Content c) {
744-
storeStep(_, c, n)
745-
or
746748
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
747749
}
748750

@@ -886,4 +888,6 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
886888
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
887889
* by default as a heuristic.
888890
*/
889-
predicate allowParameterReturnInSelf(ParameterNode p) { none() }
891+
predicate allowParameterReturnInSelf(ParameterNode p) {
892+
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p)
893+
}

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,19 @@ class Node extends TNode {
4545
}
4646

4747
/** A data-flow node corresponding to a call in the control-flow graph. */
48-
class CallNode extends LocalSourceNode {
48+
class CallNode extends LocalSourceNode, ExprNode {
4949
private CfgNodes::ExprNodes::CallCfgNode node;
5050

51-
CallNode() { node = this.asExpr() }
51+
CallNode() { node = this.getExprNode() }
5252

5353
/** Gets the data-flow node corresponding to the receiver of the call corresponding to this data-flow node */
54-
Node getReceiver() { result.asExpr() = node.getReceiver() }
54+
ExprNode getReceiver() { result.getExprNode() = node.getReceiver() }
5555

5656
/** Gets the data-flow node corresponding to the `n`th argument of the call corresponding to this data-flow node */
57-
Node getArgument(int n) { result.asExpr() = node.getArgument(n) }
57+
ExprNode getArgument(int n) { result.getExprNode() = node.getArgument(n) }
5858

5959
/** Gets the data-flow node corresponding to the named argument of the call corresponding to this data-flow node */
60-
Node getKeywordArgument(string name) { result.asExpr() = node.getKeywordArgument(name) }
60+
ExprNode getKeywordArgument(string name) { result.getExprNode() = node.getKeywordArgument(name) }
6161

6262
/** Gets the name of the the method called by the method call (if any) corresponding to this data-flow node */
6363
string getMethodName() { result = node.getExpr().(MethodCall).getMethodName() }
@@ -161,10 +161,7 @@ predicate localExprFlow(CfgNodes::ExprCfgNode e1, CfgNodes::ExprCfgNode e2) {
161161
localFlow(exprNode(e1), exprNode(e2))
162162
}
163163

164-
/**
165-
* A reference contained in an object. This is either a field, a property,
166-
* or an element in a collection.
167-
*/
164+
/** A reference contained in an object. */
168165
class Content extends TContent {
169166
/** Gets a textual representation of this content. */
170167
string toString() { none() }
@@ -173,6 +170,31 @@ class Content extends TContent {
173170
Location getLocation() { none() }
174171
}
175172

173+
/** Provides different sub classes of `Content`. */
174+
module Content {
175+
/** An element in an array. */
176+
class ArrayElementContent extends Content, TArrayElementContent { }
177+
178+
/** An element in an array at a known index. */
179+
class KnownArrayElementContent extends ArrayElementContent, TKnownArrayElementContent {
180+
private int i;
181+
182+
KnownArrayElementContent() { this = TKnownArrayElementContent(i) }
183+
184+
/** Gets the index in the array. */
185+
int getIndex() { result = i }
186+
187+
override string toString() { result = "array element " + i }
188+
}
189+
190+
/** An element in an array at an unknown index. */
191+
class UnknownArrayElementContent extends ArrayElementContent, TUnknownArrayElementContent {
192+
UnknownArrayElementContent() { this = TUnknownArrayElementContent() }
193+
194+
override string toString() { result = "array element" }
195+
}
196+
}
197+
176198
/**
177199
* A guard that validates some expression.
178200
*

ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,33 @@ predicate summaryElement(DataFlowCallable c, string input, string output, string
5858
* This covers all the Ruby-specific components of a flow summary, and
5959
* is currently restricted to `"BlockArgument"`.
6060
*/
61+
bindingset[c]
6162
SummaryComponent interpretComponentSpecific(string c) {
63+
c = "Receiver" and
64+
result = FlowSummary::SummaryComponent::receiver()
65+
or
6266
c = "BlockArgument" and
6367
result = FlowSummary::SummaryComponent::block()
6468
or
6569
c = "Argument[_]" and
6670
result = FlowSummary::SummaryComponent::argument(any(ParameterPosition pos | pos.isPositional(_)))
71+
or
72+
c = "ArrayElement" and
73+
result = FlowSummary::SummaryComponent::arrayElementAny()
74+
or
75+
c = "ArrayElement[?]" and
76+
result = FlowSummary::SummaryComponent::arrayElementUnknown()
77+
or
78+
exists(int i |
79+
c.regexpCapture("ArrayElement\\[([0-9]+)\\]", 1).toInt() = i and
80+
result = FlowSummary::SummaryComponent::arrayElementKnown(i)
81+
)
82+
or
83+
exists(int i1, int i2 |
84+
c.regexpCapture("ArrayElement\\[([-0-9]+)\\.\\.([0-9]+)\\]", 1).toInt() = i1 and
85+
c.regexpCapture("ArrayElement\\[([-0-9]+)\\.\\.([0-9]+)\\]", 2).toInt() = i2 and
86+
result = FlowSummary::SummaryComponent::arrayElementKnown([i1 .. i2])
87+
)
6788
}
6889

6990
/** Gets the textual representation of a summary component in the format used for flow summaries. */

ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
private import ruby
2+
private import DataFlowPrivate
23
private import TaintTrackingPublic
34
private import codeql.ruby.CFG
45
private import codeql.ruby.DataFlow
@@ -34,8 +35,10 @@ predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nod
3435
nodeFrom.asExpr() =
3536
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
3637
or
37-
// element reference from nodeFrom
38-
nodeFrom.asExpr() = nodeTo.asExpr().(CfgNodes::ExprNodes::ElementReferenceCfgNode).getReceiver()
39-
or
4038
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
39+
or
40+
// Although flow through arrays is modelled precisely using stores/reads, we still
41+
// allow flow out of a _tainted_ array. This is needed in order to support taint-
42+
// tracking configurations where the source is an array.
43+
readStep(nodeFrom, any(DataFlow::Content::ArrayElementContent c), nodeTo)
4144
}

0 commit comments

Comments
 (0)