Skip to content

Commit 8107fba

Browse files
authored
Merge pull request github#3456 from hvitved/dataflow/precise-field-types
Data flow: Track precise types during field flow
2 parents 4126d5b + ad56f17 commit 8107fba

32 files changed

+5435
-4589
lines changed

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll

Lines changed: 254 additions & 220 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl2.qll

Lines changed: 254 additions & 220 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl3.qll

Lines changed: 254 additions & 220 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl4.qll

Lines changed: 254 additions & 220 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll

Lines changed: 135 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -198,123 +198,130 @@ private module Cached {
198198
/**
199199
* The final flow-through calculation:
200200
*
201-
* - Input access paths are abstracted with a `ContentOption` parameter
202-
* that represents the head of the access path. `TContentNone()` means that
203-
* the access path is unrestricted.
201+
* - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`)
202+
* or summarized as a single read step with before and after types recorded
203+
* in the `ReadStepTypesOption` parameter.
204204
* - Types are checked using the `compatibleTypes()` relation.
205205
*/
206206
private module Final {
207207
/**
208208
* Holds if `p` can flow to `node` in the same callable using only
209-
* value-preserving steps, not taking call contexts into account.
209+
* value-preserving steps and possibly a single read step, not taking
210+
* call contexts into account.
210211
*
211-
* `contentIn` describes the content of `p` that can flow to `node`
212-
* (if any).
212+
* If a read step was taken, then `read` captures the `Content`, the
213+
* container type, and the content type.
213214
*/
214-
predicate parameterValueFlow(ParameterNode p, Node node, ContentOption contentIn) {
215-
parameterValueFlow0(p, node, contentIn) and
215+
predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
216+
parameterValueFlow0(p, node, read) and
216217
if node instanceof CastingNode
217218
then
218219
// normal flow through
219-
contentIn = TContentNone() and
220+
read = TReadStepTypesNone() and
220221
compatibleTypes(getErasedNodeTypeBound(p), getErasedNodeTypeBound(node))
221222
or
222223
// getter
223-
exists(Content fIn |
224-
contentIn.getContent() = fIn and
225-
compatibleTypes(fIn.getType(), getErasedNodeTypeBound(node))
226-
)
224+
compatibleTypes(read.getContentType(), getErasedNodeTypeBound(node))
227225
else any()
228226
}
229227

230228
pragma[nomagic]
231-
private predicate parameterValueFlow0(ParameterNode p, Node node, ContentOption contentIn) {
229+
private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
232230
p = node and
233231
Cand::cand(p, _) and
234-
contentIn = TContentNone()
232+
read = TReadStepTypesNone()
235233
or
236234
// local flow
237235
exists(Node mid |
238-
parameterValueFlow(p, mid, contentIn) and
236+
parameterValueFlow(p, mid, read) and
239237
LocalFlowBigStep::localFlowBigStep(mid, node)
240238
)
241239
or
242240
// read
243-
exists(Node mid, Content f |
244-
parameterValueFlow(p, mid, TContentNone()) and
245-
readStep(mid, f, node) and
246-
contentIn.getContent() = f and
241+
exists(Node mid |
242+
parameterValueFlow(p, mid, TReadStepTypesNone()) and
243+
readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
244+
read.getContentType()) and
247245
Cand::parameterValueFlowReturnCand(p, _, true) and
248-
compatibleTypes(getErasedNodeTypeBound(p), f.getContainerType())
246+
compatibleTypes(getErasedNodeTypeBound(p), read.getContainerType())
249247
)
250248
or
251249
// flow through: no prior read
252250
exists(ArgumentNode arg |
253-
parameterValueFlowArg(p, arg, TContentNone()) and
254-
argumentValueFlowsThrough(arg, contentIn, node)
251+
parameterValueFlowArg(p, arg, TReadStepTypesNone()) and
252+
argumentValueFlowsThrough(arg, read, node)
255253
)
256254
or
257255
// flow through: no read inside method
258256
exists(ArgumentNode arg |
259-
parameterValueFlowArg(p, arg, contentIn) and
260-
argumentValueFlowsThrough(arg, TContentNone(), node)
257+
parameterValueFlowArg(p, arg, read) and
258+
argumentValueFlowsThrough(arg, TReadStepTypesNone(), node)
261259
)
262260
}
263261

264262
pragma[nomagic]
265263
private predicate parameterValueFlowArg(
266-
ParameterNode p, ArgumentNode arg, ContentOption contentIn
264+
ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
267265
) {
268-
parameterValueFlow(p, arg, contentIn) and
266+
parameterValueFlow(p, arg, read) and
269267
Cand::argumentValueFlowsThroughCand(arg, _, _)
270268
}
271269

272270
pragma[nomagic]
273271
private predicate argumentValueFlowsThrough0(
274-
DataFlowCall call, ArgumentNode arg, ReturnKind kind, ContentOption contentIn
272+
DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
275273
) {
276274
exists(ParameterNode param | viableParamArg(call, param, arg) |
277-
parameterValueFlowReturn(param, kind, contentIn)
275+
parameterValueFlowReturn(param, kind, read)
278276
)
279277
}
280278

281279
/**
282-
* Holds if `arg` flows to `out` through a call using only value-preserving steps,
283-
* not taking call contexts into account.
280+
* Holds if `arg` flows to `out` through a call using only
281+
* value-preserving steps and possibly a single read step, not taking
282+
* call contexts into account.
284283
*
285-
* `contentIn` describes the content of `arg` that can flow to `out` (if any).
284+
* If a read step was taken, then `read` captures the `Content`, the
285+
* container type, and the content type.
286286
*/
287287
pragma[nomagic]
288-
predicate argumentValueFlowsThrough(ArgumentNode arg, ContentOption contentIn, Node out) {
288+
predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
289289
exists(DataFlowCall call, ReturnKind kind |
290-
argumentValueFlowsThrough0(call, arg, kind, contentIn) and
290+
argumentValueFlowsThrough0(call, arg, kind, read) and
291291
out = getAnOutNode(call, kind)
292292
|
293293
// normal flow through
294-
contentIn = TContentNone() and
294+
read = TReadStepTypesNone() and
295295
compatibleTypes(getErasedNodeTypeBound(arg), getErasedNodeTypeBound(out))
296296
or
297297
// getter
298-
exists(Content fIn |
299-
contentIn.getContent() = fIn and
300-
compatibleTypes(getErasedNodeTypeBound(arg), fIn.getContainerType()) and
301-
compatibleTypes(fIn.getType(), getErasedNodeTypeBound(out))
302-
)
298+
compatibleTypes(getErasedNodeTypeBound(arg), read.getContainerType()) and
299+
compatibleTypes(read.getContentType(), getErasedNodeTypeBound(out))
303300
)
304301
}
305302

303+
/**
304+
* Holds if `arg` flows to `out` through a call using only
305+
* value-preserving steps and a single read step, not taking call
306+
* contexts into account, thus representing a getter-step.
307+
*/
308+
predicate getterStep(ArgumentNode arg, Content c, Node out) {
309+
argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
310+
}
311+
306312
/**
307313
* Holds if `p` can flow to a return node of kind `kind` in the same
308-
* callable using only value-preserving steps.
314+
* callable using only value-preserving steps and possibly a single read
315+
* step.
309316
*
310-
* `contentIn` describes the content of `p` that can flow to the return
311-
* node (if any).
317+
* If a read step was taken, then `read` captures the `Content`, the
318+
* container type, and the content type.
312319
*/
313320
private predicate parameterValueFlowReturn(
314-
ParameterNode p, ReturnKind kind, ContentOption contentIn
321+
ParameterNode p, ReturnKind kind, ReadStepTypesOption read
315322
) {
316323
exists(ReturnNode ret |
317-
parameterValueFlow(p, ret, contentIn) and
324+
parameterValueFlow(p, ret, read) and
318325
kind = ret.getKind()
319326
)
320327
}
@@ -329,7 +336,27 @@ private module Cached {
329336
*/
330337
cached
331338
predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
332-
parameterValueFlow(p, n.getPreUpdateNode(), TContentNone())
339+
parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
340+
}
341+
342+
private predicate store(
343+
Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
344+
) {
345+
storeStep(node1, c, node2) and
346+
readStep(_, c, _) and
347+
contentType = getErasedNodeTypeBound(node1) and
348+
containerType = getErasedNodeTypeBound(node2)
349+
or
350+
exists(Node n1, Node n2 |
351+
n1 = node1.(PostUpdateNode).getPreUpdateNode() and
352+
n2 = node2.(PostUpdateNode).getPreUpdateNode()
353+
|
354+
argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
355+
or
356+
readStep(n2, c, n1) and
357+
contentType = getErasedNodeTypeBound(n1) and
358+
containerType = getErasedNodeTypeBound(n2)
359+
)
333360
}
334361

335362
/**
@@ -340,17 +367,8 @@ private module Cached {
340367
* been stored into, in order to handle cases like `x.f1.f2 = y`.
341368
*/
342369
cached
343-
predicate store(Node node1, Content f, Node node2) {
344-
storeStep(node1, f, node2) and readStep(_, f, _)
345-
or
346-
exists(Node n1, Node n2 |
347-
n1 = node1.(PostUpdateNode).getPreUpdateNode() and
348-
n2 = node2.(PostUpdateNode).getPreUpdateNode()
349-
|
350-
argumentValueFlowsThrough(n2, TContentSome(f), n1)
351-
or
352-
readStep(n2, f, n1)
353-
)
370+
predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) {
371+
store(node1, tc.getContent(), node2, contentType, tc.getContainerType())
354372
}
355373

356374
import FlowThrough
@@ -397,10 +415,13 @@ private module Cached {
397415
TBooleanNone() or
398416
TBooleanSome(boolean b) { b = true or b = false }
399417

418+
cached
419+
newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
420+
400421
cached
401422
newtype TAccessPathFront =
402423
TFrontNil(DataFlowType t) or
403-
TFrontHead(Content f)
424+
TFrontHead(TypedContent tc)
404425

405426
cached
406427
newtype TAccessPathFrontOption =
@@ -415,25 +436,38 @@ class CastingNode extends Node {
415436
CastingNode() {
416437
this instanceof ParameterNode or
417438
this instanceof CastNode or
418-
this instanceof OutNodeExt
439+
this instanceof OutNodeExt or
440+
// For reads, `x.f`, we want to check that the tracked type after the read (which
441+
// is obtained by popping the head of the access path stack) is compatible with
442+
// the type of `x.f`.
443+
readStep(_, _, this)
419444
}
420445
}
421446

422-
newtype TContentOption =
423-
TContentNone() or
424-
TContentSome(Content f)
447+
private predicate readStepWithTypes(
448+
Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
449+
) {
450+
readStep(n1, c, n2) and
451+
container = getErasedNodeTypeBound(n1) and
452+
content = getErasedNodeTypeBound(n2)
453+
}
425454

426-
private class ContentOption extends TContentOption {
427-
Content getContent() { this = TContentSome(result) }
455+
private newtype TReadStepTypesOption =
456+
TReadStepTypesNone() or
457+
TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) {
458+
readStepWithTypes(_, container, c, _, content)
459+
}
428460

429-
predicate hasContent() { exists(this.getContent()) }
461+
private class ReadStepTypesOption extends TReadStepTypesOption {
462+
predicate isSome() { this instanceof TReadStepTypesSome }
430463

431-
string toString() {
432-
result = this.getContent().toString()
433-
or
434-
not this.hasContent() and
435-
result = "<none>"
436-
}
464+
DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) }
465+
466+
Content getContent() { this = TReadStepTypesSome(_, result, _) }
467+
468+
DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) }
469+
470+
string toString() { if this.isSome() then result = "Some(..)" else result = "None()" }
437471
}
438472

439473
/**
@@ -692,6 +726,23 @@ class BooleanOption extends TBooleanOption {
692726
}
693727
}
694728

729+
/** Content tagged with the type of a containing object. */
730+
class TypedContent extends MkTypedContent {
731+
private Content c;
732+
private DataFlowType t;
733+
734+
TypedContent() { this = MkTypedContent(c, t) }
735+
736+
/** Gets the content. */
737+
Content getContent() { result = c }
738+
739+
/** Gets the container type. */
740+
DataFlowType getContainerType() { result = t }
741+
742+
/** Gets a textual representation of this content. */
743+
string toString() { result = c.toString() }
744+
}
745+
695746
/**
696747
* The front of an access path. This is either a head or a nil.
697748
*/
@@ -702,25 +753,29 @@ abstract class AccessPathFront extends TAccessPathFront {
702753

703754
abstract boolean toBoolNonEmpty();
704755

705-
predicate headUsesContent(Content f) { this = TFrontHead(f) }
756+
predicate headUsesContent(TypedContent tc) { this = TFrontHead(tc) }
706757
}
707758

708759
class AccessPathFrontNil extends AccessPathFront, TFrontNil {
709-
override string toString() {
710-
exists(DataFlowType t | this = TFrontNil(t) | result = ppReprType(t))
711-
}
760+
private DataFlowType t;
712761

713-
override DataFlowType getType() { this = TFrontNil(result) }
762+
AccessPathFrontNil() { this = TFrontNil(t) }
763+
764+
override string toString() { result = ppReprType(t) }
765+
766+
override DataFlowType getType() { result = t }
714767

715768
override boolean toBoolNonEmpty() { result = false }
716769
}
717770

718771
class AccessPathFrontHead extends AccessPathFront, TFrontHead {
719-
override string toString() { exists(Content f | this = TFrontHead(f) | result = f.toString()) }
772+
private TypedContent tc;
720773

721-
override DataFlowType getType() {
722-
exists(Content head | this = TFrontHead(head) | result = head.getContainerType())
723-
}
774+
AccessPathFrontHead() { this = TFrontHead(tc) }
775+
776+
override string toString() { result = tc.toString() }
777+
778+
override DataFlowType getType() { result = tc.getContainerType() }
724779

725780
override boolean toBoolNonEmpty() { result = true }
726781
}

0 commit comments

Comments
 (0)