Skip to content

Commit cbfd0b3

Browse files
committed
PS: Add element content flow for reads and writes into arrays.
1 parent 1f558a0 commit cbfd0b3

File tree

4 files changed

+183
-25
lines changed

4 files changed

+183
-25
lines changed

powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll

Lines changed: 88 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ private module Cached {
128128
n = member.getBase() and
129129
not member.isStatic()
130130
)
131+
or
132+
n = any(CfgNodes::ExprNodes::IndexCfgNode index).getBase()
131133
}
132134

133135
cached
@@ -219,23 +221,51 @@ private module Cached {
219221
}
220222

221223
cached
222-
newtype TContentSet = TSingletonContent(Content c)
224+
newtype TContentSet =
225+
TSingletonContent(Content c) or
226+
TAnyElementContent() or
227+
TKnownOrUnknownElementContent(Content::KnownElementContent c)
228+
229+
private predicate trackKnownValue(ConstantValue cv) {
230+
exists(cv.asString())
231+
or
232+
cv.asInt() = [0 .. 10]
233+
}
223234

224235
cached
225236
newtype TContent =
226237
TFieldContent(string name) {
227238
name = any(PropertyMember member).getName()
228239
or
229240
name = any(MemberExpr me).getMemberName()
230-
}
241+
} or
242+
TKnownElementContent(ConstantValue cv) { trackKnownValue(cv) } or
243+
TUnknownElementContent()
231244

232245
cached
233-
newtype TContentApprox = TNonElementContentApprox(Content c)
246+
newtype TContentApprox =
247+
TNonElementContentApprox(Content c) { not c instanceof Content::ElementContent } or
248+
TUnknownElementContentApprox() or
249+
TKnownIntegerElementContentApprox() or
250+
TKnownElementContentApprox(string approx) { approx = approxKnownElementIndex(_) }
234251

235252
cached
236253
newtype TDataFlowType = TUnknownDataFlowType()
237254
}
238255

256+
class TElementContent = TKnownElementContent or TUnknownElementContent;
257+
258+
/** Gets a string for approximating known element indices. */
259+
private string approxKnownElementIndex(ConstantValue cv) {
260+
not exists(cv.asInt()) and
261+
exists(string s | s = cv.serialize() |
262+
s.length() < 2 and
263+
result = s
264+
or
265+
result = s.prefix(2)
266+
)
267+
}
268+
239269
import Cached
240270

241271
/** Holds if `n` should be hidden from path explanations. */
@@ -477,26 +507,54 @@ predicate jumpStep(Node pred, Node succ) {
477507
* content `c`.
478508
*/
479509
predicate storeStep(Node node1, ContentSet c, Node node2) {
480-
node2.(PostUpdateNode).getPreUpdateNode().asExpr() =
481-
any(CfgNodes::ExprNodes::MemberCfgNode var |
482-
exists(CfgNodes::StmtNodes::AssignStmtCfgNode assign |
483-
var = assign.getLeftHandSide() and
484-
node1.asStmt() = assign.getRightHandSide()
485-
|
486-
c.isSingleton(any(Content::FieldContent ct | ct.getName() = var.getMemberName()))
487-
)
488-
).getBase()
510+
exists(CfgNodes::ExprNodes::MemberCfgWriteAccessNode var, Content::FieldContent fc |
511+
node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getBase() and
512+
node1.asStmt() = var.getAssignStmt().getRightHandSide() and
513+
fc.getName() = var.getMemberName() and
514+
c.isSingleton(fc)
515+
)
516+
or
517+
exists(
518+
CfgNodes::ExprNodes::IndexCfgWriteNode var, Content::KnownElementContent ec, int index,
519+
CfgNodes::ExprCfgNode e
520+
|
521+
node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getBase() and
522+
node1.asStmt() = var.getAssignStmt().getRightHandSide() and
523+
c.isKnownOrUnknownElement(ec) and
524+
index = ec.getIndex().asInt() and
525+
e = var.getIndex()
526+
|
527+
index = e.getValue().asInt()
528+
or
529+
not exists(e.getValue().asInt())
530+
)
489531
}
490532

491533
/**
492534
* Holds if there is a read step of content `c` from `node1` to `node2`.
493535
*/
494536
predicate readStep(Node node1, ContentSet c, Node node2) {
495-
node2.asExpr() =
496-
any(CfgNodes::ExprNodes::MemberCfgReadAccessNode var |
497-
node1.asExpr() = var.getBase() and
498-
c.isSingleton(any(Content::FieldContent ct | ct.getName() = var.getMemberName()))
499-
)
537+
exists(CfgNodes::ExprNodes::MemberCfgReadAccessNode var, Content::FieldContent fc |
538+
node2.asExpr() = var and
539+
node1.asExpr() = var.getBase() and
540+
fc.getName() = var.getMemberName() and
541+
c.isSingleton(fc)
542+
)
543+
or
544+
exists(
545+
CfgNodes::ExprNodes::IndexCfgReadNode var, Content::KnownElementContent ec, int index,
546+
CfgNodes::ExprCfgNode e
547+
|
548+
node2.asExpr() = var and
549+
node1.asExpr() = var.getBase() and
550+
c.isKnownOrUnknownElement(ec) and
551+
index = ec.getIndex().asInt() and
552+
e = var.getIndex()
553+
|
554+
index = e.getValue().asInt()
555+
or
556+
not exists(e.getValue().asInt())
557+
)
500558
}
501559

502560
/**
@@ -584,7 +642,7 @@ class DataFlowExpr = CfgNodes::ExprCfgNode;
584642
* Holds if access paths with `c` at their head always should be tracked at high
585643
* precision. This disables adaptive access path precision for such access paths.
586644
*/
587-
predicate forceHighPrecision(Content c) { none() }
645+
predicate forceHighPrecision(Content c) { c instanceof Content::ElementContent }
588646

589647
class NodeRegion instanceof Unit {
590648
string toString() { result = "NodeRegion" }
@@ -653,7 +711,18 @@ class ContentApprox extends TContentApprox {
653711
}
654712

655713
/** Gets an approximated value for content `c`. */
656-
ContentApprox getContentApprox(Content c) { result = TNonElementContentApprox(c) }
714+
ContentApprox getContentApprox(Content c) {
715+
c instanceof Content::UnknownElementContent and
716+
result = TUnknownElementContentApprox()
717+
or
718+
exists(c.(Content::KnownElementContent).getIndex().asInt()) and
719+
result = TKnownIntegerElementContentApprox()
720+
or
721+
result =
722+
TKnownElementContentApprox(approxKnownElementIndex(c.(Content::KnownElementContent).getIndex()))
723+
or
724+
result = TNonElementContentApprox(c)
725+
}
657726

658727
/**
659728
* A unit class for adding additional jump steps.

powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,26 @@ class Content extends TContent {
169169

170170
/** Provides different sub classes of `Content`. */
171171
module Content {
172+
/** An element in a collection, for example an element in an array or in a hash. */
173+
class ElementContent extends Content, TElementContent { }
174+
175+
/** An element in a collection at a known index. */
176+
class KnownElementContent extends ElementContent, TKnownElementContent {
177+
private ConstantValue cv;
178+
179+
KnownElementContent() { this = TKnownElementContent(cv) }
180+
181+
/** Gets the index in the collection. */
182+
ConstantValue getIndex() { result = cv }
183+
184+
override string toString() { result = "element " + cv }
185+
}
186+
187+
/** An element in a collection at an unknown index. */
188+
class UnknownElementContent extends ElementContent, TUnknownElementContent {
189+
override string toString() { result = "element" }
190+
}
191+
172192
/** A field of an object. */
173193
class FieldContent extends Content, TFieldContent {
174194
private string name;
@@ -192,19 +212,66 @@ class ContentSet extends TContentSet {
192212
/** Holds if this content set is the singleton `{c}`. */
193213
predicate isSingleton(Content c) { this = TSingletonContent(c) }
194214

215+
/** Holds if this content set represents all `ElementContent`s. */
216+
predicate isAnyElement() { this = TAnyElementContent() }
217+
218+
/**
219+
* Holds if this content set represents a specific known element index, or an
220+
* unknown element index.
221+
*/
222+
predicate isKnownOrUnknownElement(Content::KnownElementContent c) {
223+
this = TKnownOrUnknownElementContent(c)
224+
}
225+
195226
/** Gets a textual representation of this content set. */
196227
string toString() {
197228
exists(Content c |
198229
this.isSingleton(c) and
199230
result = c.toString()
200231
)
232+
or
233+
this.isAnyElement() and
234+
result = "any element"
235+
or
236+
exists(Content::KnownElementContent c |
237+
this.isKnownOrUnknownElement(c) and
238+
result = c + " or unknown"
239+
)
240+
}
241+
242+
Content getAStoreContent() {
243+
this.isSingleton(result)
244+
or
245+
// For reverse stores, `a[unknown][0] = x`, it is important that the read-step
246+
// from `a` to `a[unknown]` (which can read any element), gets translated into
247+
// a reverse store step that store only into `?`
248+
this.isAnyElement() and
249+
result = TUnknownElementContent()
250+
or
251+
// For reverse stores, `a[1][0] = x`, it is important that the read-step
252+
// from `a` to `a[1]` (which can read both elements stored at exactly index `1`
253+
// and elements stored at unknown index), gets translated into a reverse store
254+
// step that store only into `1`
255+
this.isKnownOrUnknownElement(result)
201256
}
202257

203-
/** Gets a content that may be stored into when storing into this set. */
204-
Content getAStoreContent() { this.isSingleton(result) }
258+
pragma[nomagic]
259+
private Content getAnElementReadContent() {
260+
exists(Content::KnownElementContent c | this.isKnownOrUnknownElement(c) |
261+
result = c or
262+
result = TUnknownElementContent()
263+
)
264+
}
205265

206266
/** Gets a content that may be read from when reading from this set. */
207-
Content getAReadContent() { this.isSingleton(result) }
267+
Content getAReadContent() {
268+
this.isSingleton(result)
269+
or
270+
this.isAnyElement() and
271+
result instanceof Content::ElementContent
272+
or
273+
result = this.getAnElementReadContent()
274+
}
208275
}
209276

210277
/**

powershell/ql/lib/semmle/code/powershell/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,19 @@ private module Cached {
3030
*/
3131
cached
3232
predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
33-
none()
33+
// Although flow through collections is modeled precisely using stores/reads, we still
34+
// allow flow out of a _tainted_ collection. This is needed in order to support taint-
35+
// tracking configurations where the source is a collection.
36+
exists(DataFlow::ContentSet c | readStep(nodeFrom, c, nodeTo) |
37+
c.isSingleton(any(DataFlow::Content::ElementContent ec))
38+
or
39+
c.isKnownOrUnknownElement(_)
40+
// or
41+
// TODO: We do't generate this one from readSteps yet, but we will as
42+
// soon as we start on models-as-data.
43+
// c.isAnyElement()
44+
) and
45+
model = ""
3446
}
3547

3648
/**

powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,21 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
5353
class ContentFilter = TypeTrackingInput::ContentFilter;
5454

5555
ContentFilter getFilterFromWithoutContentStep(Content content) {
56-
none() // TODO
56+
(
57+
content.isAnyElement()
58+
or
59+
content.isSingleton(any(DataFlow::Content::UnknownElementContent c))
60+
) and
61+
result = MkElementFilter()
5762
}
5863

5964
ContentFilter getFilterFromWithContentStep(Content content) {
60-
none() // TODO
65+
(
66+
content.isAnyElement()
67+
or
68+
content.isSingleton(any(DataFlow::Content::ElementContent c))
69+
) and
70+
result = MkElementFilter()
6171
}
6272

6373
// Summaries and their stacks

0 commit comments

Comments
 (0)