Skip to content

Commit 725d76e

Browse files
committed
Ruby: Implement ContentSet
1 parent c4fbc61 commit 725d76e

File tree

7 files changed

+106
-80
lines changed

7 files changed

+106
-80
lines changed

ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ module SummaryComponent {
3232
SummaryComponent block() { result = argument(any(ParameterPosition pos | pos.isBlock())) }
3333

3434
/** Gets a summary component that represents an element in an array at an unknown index. */
35-
SummaryComponent arrayElementUnknown() { result = SC::content(TUnknownArrayElementContent()) }
35+
SummaryComponent arrayElementUnknown() {
36+
result = SC::content(TSingletonContent(TUnknownArrayElementContent()))
37+
}
3638

3739
/**
3840
* Gets a summary component that represents an element in an array at a known index.
@@ -42,7 +44,7 @@ module SummaryComponent {
4244
*/
4345
bindingset[i]
4446
SummaryComponent arrayElementKnown(int i) {
45-
result = SC::content(TKnownArrayElementContent(i))
47+
result = SC::content(TSingletonContent(TKnownArrayElementContent(i)))
4648
or
4749
// `i` may be out of range
4850
i >= 0 and
@@ -134,7 +136,7 @@ abstract class SummarizedCallable extends LibraryCallable {
134136
* arguments at position `pos` to this callable.
135137
*/
136138
pragma[nomagic]
137-
predicate clearsContent(ParameterPosition pos, DataFlow::Content content) { none() }
139+
predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) { none() }
138140
}
139141

140142
/**
@@ -161,7 +163,7 @@ private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable
161163
sc.propagatesFlow(input, output, preservesValue)
162164
}
163165

164-
final override predicate clearsContent(ParameterPosition pos, DataFlow::Content content) {
166+
final override predicate clearsContent(ParameterPosition pos, DataFlow::ContentSet content) {
165167
sc.clearsContent(pos, content)
166168
}
167169
}

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -316,11 +316,15 @@ private module Cached {
316316
entrySsaDefinition(n)
317317
}
318318

319+
cached
320+
newtype TContentSet =
321+
TSingletonContent(Content c) or
322+
TAnyArrayElementContent()
323+
319324
cached
320325
newtype TContent =
321326
TKnownArrayElementContent(int i) { i in [0 .. 10] } or
322-
TUnknownArrayElementContent() or
323-
TAnyArrayElementContent()
327+
TUnknownArrayElementContent()
324328

325329
/**
326330
* Holds if `e` is an `ExprNode` that may be returned by a call to `c`.
@@ -776,26 +780,20 @@ predicate jumpStep(Node pred, Node succ) {
776780
succ.asExpr().getExpr().(ConstantReadAccess).getValue() = pred.asExpr().getExpr()
777781
}
778782

779-
predicate storeStep(Node node1, Content c, Node node2) {
783+
predicate storeStep(Node node1, ContentSet c, Node node2) {
780784
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2)
781785
}
782786

783-
predicate readStep(Node node1, Content c, Node node2) {
784-
exists(Content c0 | FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c0, node2) |
785-
if c0 = TAnyArrayElementContent()
786-
then
787-
c instanceof TUnknownArrayElementContent or
788-
c instanceof TKnownArrayElementContent
789-
else c = c0
790-
)
787+
predicate readStep(Node node1, ContentSet c, Node node2) {
788+
FlowSummaryImpl::Private::Steps::summaryReadStep(node1, c, node2)
791789
}
792790

793791
/**
794792
* Holds if values stored inside content `c` are cleared at node `n`. For example,
795793
* any value stored inside `f` is cleared at the pre-update node associated with `x`
796794
* in `x.f = newValue`.
797795
*/
798-
predicate clearsContent(Node n, Content c) {
796+
predicate clearsContent(Node n, ContentSet c) {
799797
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
800798
}
801799

@@ -875,7 +873,7 @@ int accessPathLimit() { result = 5 }
875873
* Holds if access paths with `c` at their head always should be tracked at high
876874
* precision. This disables adaptive access path precision for such access paths.
877875
*/
878-
predicate forceHighPrecision(Content c) { none() }
876+
predicate forceHighPrecision(Content c) { c instanceof Content::ArrayElementContent }
879877

880878
/** The unit type. */
881879
private newtype TUnit = TMkUnit()

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,14 +194,46 @@ module Content {
194194
class UnknownArrayElementContent extends ArrayElementContent, TUnknownArrayElementContent {
195195
override string toString() { result = "array element" }
196196
}
197+
}
197198

198-
/**
199-
* Used internally only, to represent the union of `KnownArrayElementContent`
200-
* and `UnknownArrayElementContent`, to avoid combinatorial explosions in
201-
* `SummaryComponentStack`s in flow summaries.
202-
*/
203-
private class AnyArrayElementContent extends Content, TAnyArrayElementContent {
204-
override string toString() { result = "any array element" }
199+
/**
200+
* An entity that represents a set of `Content`s.
201+
*
202+
* The set may be interpreted differently depending on whether it is
203+
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
204+
*/
205+
class ContentSet extends TContentSet {
206+
/** Holds if this content set is the singleton `{c}`. */
207+
predicate isSingleton(Content c) { this = TSingletonContent(c) }
208+
209+
/** Holds if this content set represent all `ArrayElementContent`s. */
210+
predicate isAnyArrayElement() { this = TAnyArrayElementContent() }
211+
212+
/** Gets a textual representation of this content set. */
213+
string toString() {
214+
exists(Content c |
215+
this.isSingleton(c) and
216+
result = c.toString()
217+
)
218+
or
219+
this.isAnyArrayElement() and
220+
result = "any array element"
221+
}
222+
223+
/** Gets a content that may be stored into when storing into this set. */
224+
Content getAStoreContent() {
225+
this.isSingleton(result)
226+
or
227+
this.isAnyArrayElement() and
228+
result = TUnknownArrayElementContent()
229+
}
230+
231+
/** Gets a content that may be read from when reading from this set. */
232+
Content getAReadContent() {
233+
this.isSingleton(result)
234+
or
235+
this.isAnyArrayElement() and
236+
result instanceof Content::ArrayElementContent
205237
}
206238
}
207239

ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Node summaryNode(SummarizedCallable c, SummaryNodeState state) { result = TSumma
2121
SummaryCall summaryDataFlowCall(Node receiver) { receiver = result.getReceiver() }
2222

2323
/** Gets the type of content `c`. */
24-
DataFlowType getContentType(Content c) { any() }
24+
DataFlowType getContentType(ContentSet c) { any() }
2525

2626
/** Gets the return type of kind `rk` for callable `c`. */
2727
bindingset[c, rk]

ruby/ql/lib/codeql/ruby/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ predicate defaultTaintSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
2222
* of `c` at sinks and inputs to additional taint steps.
2323
*/
2424
bindingset[node]
25-
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
25+
predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { none() }
2626

2727
private CfgNodes::ExprNodes::VariableWriteAccessCfgNode variablesInPattern(
2828
CfgNodes::ExprNodes::CasePatternCfgNode p
@@ -95,10 +95,14 @@ private module Cached {
9595
or
9696
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
9797
or
98-
// Although flow through arrays is modelled precisely using stores/reads, we still
99-
// allow flow out of a _tainted_ array. This is needed in order to support taint-
100-
// tracking configurations where the source is an array.
101-
readStep(nodeFrom, any(DataFlow::Content::ArrayElementContent c), nodeTo)
98+
// Although flow through collections is modelled precisely using stores/reads, we still
99+
// allow flow out of a _tainted_ collection. This is needed in order to support taint-
100+
// tracking configurations where the source is a collection.
101+
exists(DataFlow::ContentSet c | readStep(nodeFrom, c, nodeTo) |
102+
c.isSingleton(any(DataFlow::Content::ArrayElementContent aec))
103+
or
104+
c.isAnyArrayElement()
105+
)
102106
}
103107

104108
/**

0 commit comments

Comments
 (0)