Skip to content

Commit d61f645

Browse files
authored
Merge pull request github#8942 from hvitved/ruby/dataflow/hashes
Ruby: Data-flow through hashes
2 parents 6781a76 + 1ae8087 commit d61f645

File tree

17 files changed

+3025
-210
lines changed

17 files changed

+3025
-210
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: majorAnalysis
3+
---
4+
Added data-flow support for [hashes](https://docs.ruby-lang.org/en/3.1/Hash.html).

ruby/ql/lib/codeql/ruby/dataflow/FlowSummary.qll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,24 @@ module SummaryComponent {
5757
*/
5858
SummaryComponent elementAny() { result = SC::content(TAnyElementContent()) }
5959

60+
/**
61+
* Gets a summary component that represents an element in a collection at known
62+
* integer index `lower` or above.
63+
*/
64+
SummaryComponent elementLowerBound(int lower) {
65+
result = SC::content(TElementLowerBoundContent(lower))
66+
}
67+
68+
/** Gets a summary component that represents a value in a pair at an unknown key. */
69+
SummaryComponent pairValueUnknown() {
70+
result = SC::content(TSingletonContent(TUnknownPairValueContent()))
71+
}
72+
73+
/** Gets a summary component that represents a value in a pair at a known key. */
74+
SummaryComponent pairValueKnown(ConstantValue key) {
75+
result = SC::content(TSingletonContent(TKnownPairValueContent(key)))
76+
}
77+
6078
/** Gets a summary component that represents the return value of a call. */
6179
SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) }
6280
}

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ private import DataFlowPublic
66
private import DataFlowDispatch
77
private import SsaImpl as SsaImpl
88
private import FlowSummaryImpl as FlowSummaryImpl
9+
private import FlowSummaryImplSpecific as FlowSummaryImplSpecific
910

1011
/** Gets the callable in which this node occurs. */
1112
DataFlowCallable nodeGetEnclosingCallable(NodeImpl n) { result = n.getEnclosingCallable() }
@@ -177,7 +178,7 @@ private class Argument extends CfgNodes::ExprCfgNode {
177178
exists(int i |
178179
this = call.getArgument(i) and
179180
not this.getExpr() instanceof BlockArgument and
180-
not exists(this.getExpr().(Pair).getKey().getConstantValue().getSymbol()) and
181+
not this.getExpr().(Pair).getKey().getConstantValue().isSymbol(_) and
181182
arg.isPositional(i)
182183
)
183184
or
@@ -334,16 +335,21 @@ private module Cached {
334335
cached
335336
newtype TContentSet =
336337
TSingletonContent(Content c) or
337-
TAnyElementContent()
338+
TAnyElementContent() or
339+
TElementLowerBoundContent(int lower) {
340+
FlowSummaryImplSpecific::ParsePositions::isParsedElementLowerBoundPosition(_, lower)
341+
}
338342

339343
cached
340344
newtype TContent =
341345
TKnownElementContent(ConstantValue cv) {
342346
not cv.isInt(_) or
343347
cv.getInt() in [0 .. 10]
344348
} or
345-
TFieldContent(string name) { name = any(InstanceVariable v).getName() } or
346-
TUnknownElementContent()
349+
TUnknownElementContent() or
350+
TKnownPairValueContent(ConstantValue cv) or
351+
TUnknownPairValueContent() or
352+
TFieldContent(string name) { name = any(InstanceVariable v).getName() }
347353

348354
/**
349355
* Holds if `e` is an `ExprNode` that may be returned by a call to `c`.
@@ -362,6 +368,8 @@ private module Cached {
362368

363369
class TElementContent = TKnownElementContent or TUnknownElementContent;
364370

371+
class TPairValueContent = TKnownPairValueContent or TUnknownPairValueContent;
372+
365373
import Cached
366374

367375
/** Holds if `n` should be hidden from path explanations. */
@@ -818,6 +826,25 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
818826
).getReceiver()
819827
or
820828
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2)
829+
or
830+
// Needed for pairs passed into method calls where the key is not a symbol,
831+
// that is, where it is not a keyword argument.
832+
node2.asExpr() =
833+
any(CfgNodes::ExprNodes::PairCfgNode pair |
834+
exists(CfgNodes::ExprCfgNode key |
835+
key = pair.getKey() and
836+
pair.getValue() = node1.asExpr()
837+
|
838+
exists(ConstantValue cv |
839+
cv = key.getConstantValue() and
840+
not cv.isSymbol(_) and // handled as a keyword argument
841+
c.isSingleton(TKnownPairValueContent(cv))
842+
)
843+
or
844+
not exists(key.getConstantValue()) and
845+
c.isSingleton(TUnknownPairValueContent())
846+
)
847+
)
821848
}
822849

823850
/**

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,35 @@ module Content {
242242
not exists(TKnownElementContent(cv)) and
243243
result = TUnknownElementContent()
244244
}
245+
246+
/**
247+
* Gets the constant value of `e`, which corresponds to a valid known
248+
* element index. Unlike calling simply `e.getConstantValue()`, this
249+
* excludes negative array indices.
250+
*/
251+
ConstantValue getKnownElementIndex(Expr e) {
252+
result = getElementContent(e.getConstantValue()).(KnownElementContent).getIndex()
253+
}
254+
255+
/** A value in a pair with a known or unknown key. */
256+
class PairValueContent extends Content, TPairValueContent { }
257+
258+
/** A value in a pair with a known key. */
259+
class KnownPairValueContent extends PairValueContent, TKnownPairValueContent {
260+
private ConstantValue key;
261+
262+
KnownPairValueContent() { this = TKnownPairValueContent(key) }
263+
264+
/** Gets the index in the collection. */
265+
ConstantValue getIndex() { result = key }
266+
267+
override string toString() { result = "pair " + key }
268+
}
269+
270+
/** A value in a pair with an unknown key. */
271+
class UnknownPairValueContent extends PairValueContent, TUnknownPairValueContent {
272+
override string toString() { result = "pair" }
273+
}
245274
}
246275

247276
/**
@@ -257,6 +286,12 @@ class ContentSet extends TContentSet {
257286
/** Holds if this content set represents all `ElementContent`s. */
258287
predicate isAnyElement() { this = TAnyElementContent() }
259288

289+
/**
290+
* Holds if this content set represents all `KnownElementContent`s where
291+
* the index is an integer greater than or equal to `lower`.
292+
*/
293+
predicate isElementLowerBound(int lower) { this = TElementLowerBoundContent(lower) }
294+
260295
/** Gets a textual representation of this content set. */
261296
string toString() {
262297
exists(Content c |
@@ -265,7 +300,12 @@ class ContentSet extends TContentSet {
265300
)
266301
or
267302
this.isAnyElement() and
268-
result = "any array element"
303+
result = "any element"
304+
or
305+
exists(int lower |
306+
this.isElementLowerBound(lower) and
307+
result = lower + ".."
308+
)
269309
}
270310

271311
/** Gets a content that may be stored into when storing into this set. */
@@ -274,6 +314,9 @@ class ContentSet extends TContentSet {
274314
or
275315
this.isAnyElement() and
276316
result = TUnknownElementContent()
317+
or
318+
this.isElementLowerBound(_) and
319+
result = TUnknownElementContent()
277320
}
278321

279322
/** Gets a content that may be read from when reading from this set. */
@@ -282,6 +325,12 @@ class ContentSet extends TContentSet {
282325
or
283326
this.isAnyElement() and
284327
result instanceof Content::ElementContent
328+
or
329+
exists(int lower, int i |
330+
this.isElementLowerBound(lower) and
331+
result.(Content::KnownElementContent).getIndex().isInt(i) and
332+
i >= lower
333+
)
285334
}
286335
}
287336

ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ private SummaryComponent interpretElementArg(string arg) {
6767
arg = "any" and
6868
result = FlowSummary::SummaryComponent::elementAny()
6969
or
70+
exists(int lower |
71+
ParsePositions::isParsedElementLowerBoundPosition(arg, lower) and
72+
result = FlowSummary::SummaryComponent::elementLowerBound(lower)
73+
)
74+
or
7075
exists(ConstantValue cv | result = FlowSummary::SummaryComponent::elementKnown(cv) |
7176
cv.isInt(AccessPath::parseInt(arg))
7277
or
@@ -103,6 +108,16 @@ SummaryComponent interpretComponentSpecific(AccessPathToken c) {
103108
interpretElementArg(c.getAnArgument("WithoutElement")) and
104109
result = FlowSummary::SummaryComponent::withoutContent(cs)
105110
)
111+
or
112+
exists(string arg | arg = c.getAnArgument("PairValue") |
113+
arg = "?" and
114+
result = FlowSummary::SummaryComponent::pairValueUnknown()
115+
or
116+
exists(ConstantValue cv |
117+
result = FlowSummary::SummaryComponent::pairValueKnown(cv) and
118+
cv.serialize() = arg
119+
)
120+
)
106121
}
107122

108123
/** Gets the textual representation of a summary component in the format used for flow summaries. */
@@ -217,6 +232,13 @@ module ParsePositions {
217232
)
218233
}
219234

235+
private predicate isElementBody(string body) {
236+
exists(AccessPathToken tok |
237+
tok.getName() = "Element" and
238+
body = tok.getAnArgument()
239+
)
240+
}
241+
220242
predicate isParsedParameterPosition(string c, int i) {
221243
isParamBody(c) and
222244
i = AccessPath::parseInt(c)
@@ -241,6 +263,11 @@ module ParsePositions {
241263
isArgBody(c) and
242264
c = paramName + ":"
243265
}
266+
267+
predicate isParsedElementLowerBoundPosition(string c, int lower) {
268+
isElementBody(c) and
269+
lower = AccessPath::parseLowerBound(c)
270+
}
244271
}
245272

246273
/** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */

ruby/ql/lib/codeql/ruby/frameworks/Core.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import core.Object::Object
1010
import core.Kernel::Kernel
1111
import core.Module
1212
import core.Array
13+
import core.Hash
1314
import core.String
1415
import core.Regexp
1516
import core.IO

0 commit comments

Comments
 (0)