Skip to content

Commit 9c0f877

Browse files
committed
C++: Keep old instruction -> instruction flow in simpleInstructionLocalFlowStep. This means we don't have to add general operand -> instruction to the simpleLocalFlowStep relation, which seems to add a 10% performance regression.
1 parent a0bfbda commit 9c0f877

File tree

3 files changed

+117
-58
lines changed

3 files changed

+117
-58
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,19 +186,18 @@ private class ArrayContent extends Content, TArrayContent {
186186

187187
private predicate storeStepNoChi(Node node1, Content f, PostUpdateNode node2) {
188188
exists(FieldAddressInstruction fa, StoreInstruction store |
189-
node2.asInstruction() = store and
190-
node1.asOperand() = store.getSourceValueOperand() and
189+
store = node2.asInstruction() and
191190
store.getDestinationAddress() = fa and
191+
store.getSourceValue() = node1.asInstruction() and
192192
f.(FieldContent).getField() = fa.getField()
193193
)
194194
}
195195

196196
private predicate storeStepChi(Node node1, Content f, PostUpdateNode node2) {
197-
exists(FieldAddressInstruction fa, ChiInstruction chi, StoreInstruction store |
198-
node2.asInstruction() = chi and
199-
node1.asOperand() = chi.getPartialOperand() and
200-
chi.getPartial() = store and
197+
exists(FieldAddressInstruction fa, StoreInstruction store |
198+
node1.asInstruction() = store and
201199
store.getDestinationAddress() = fa and
200+
node2.asInstruction().(ChiInstruction).getPartial() = store and
202201
f.(FieldContent).getField() = fa.getField()
203202
)
204203
}
@@ -220,10 +219,10 @@ predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
220219
*/
221220
predicate readStep(Node node1, Content f, Node node2) {
222221
exists(FieldAddressInstruction fa, LoadInstruction load |
223-
node2.asInstruction() = load and
224-
node1.asOperand() = load.getSourceValueOperand() and
225222
load.getSourceAddress() = fa and
226-
fa.getField() = f.(FieldContent).getField()
223+
node1.asInstruction() = load.getSourceValueOperand().getAnyDef() and
224+
fa.getField() = f.(FieldContent).getField() and
225+
load = node2.asInstruction()
227226
)
228227
}
229228

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode {
332332
)
333333
}
334334

335+
// By using an operand as the result of this predicate we avoid the dataflow inconsistency errors
336+
// caused by having multiple nodes sharing the same pre update node. This inconsistency error can cause
337+
// a tuple explosion in the big step dataflow relation since it can make many nodes be the entry node
338+
// into a big step.
335339
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
336340

337341
override Expr getDefinedExpr() {
@@ -504,10 +508,11 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
504508
* data flow. It may have less flow than the `localFlowStep` predicate.
505509
*/
506510
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
507-
simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
511+
// Instruction -> Instruction flow
512+
simpleInstructionLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asInstruction())
508513
or
509-
// Flow from an instruction to its operands
510-
nodeTo.asOperand().getAnyDef() = nodeFrom.asInstruction()
514+
// Operand -> Instruction flow
515+
simpleOperandLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
511516
}
512517

513518
pragma[noinline]
@@ -519,16 +524,26 @@ private predicate getFieldSizeOfClass(Class c, Type type, int size) {
519524
)
520525
}
521526

527+
private predicate simpleOperandLocalFlowStep(Operand opFrom, Instruction iTo) {
528+
// Certain dataflow steps (for instance `PostUpdateNode.getPreUpdateNode()`) generates flow to
529+
// operands, so we include dataflow from those operands to the "result" of the instruction (i.e., to
530+
// the instruction itself).
531+
exists(PostUpdateNode post |
532+
opFrom = post.getPreUpdateNode().asOperand() and
533+
iTo.getAnOperand() = opFrom
534+
)
535+
}
536+
522537
cached
523-
private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
524-
iTo.(CopyInstruction).getSourceValueOperand() = opFrom and not opFrom.isDefinitionInexact()
538+
private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo) {
539+
iTo.(CopyInstruction).getSourceValue() = iFrom
525540
or
526-
iTo.(PhiInstruction).getAnInputOperand() = opFrom and not opFrom.isDefinitionInexact()
541+
iTo.(PhiInstruction).getAnOperand().getDef() = iFrom
527542
or
528543
// A read side effect is almost never exact since we don't know exactly how
529544
// much memory the callee will read.
530-
iTo.(ReadSideEffectInstruction).getSideEffectOperand() = opFrom and
531-
not opFrom.getAnyDef().isResultConflated()
545+
iTo.(ReadSideEffectInstruction).getSideEffectOperand().getAnyDef() = iFrom and
546+
not iFrom.isResultConflated()
532547
or
533548
// Loading a single `int` from an `int *` parameter is not an exact load since
534549
// the parameter may point to an entire array rather than a single `int`. The
@@ -541,8 +556,8 @@ private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo
541556
// reassignment of the parameter indirection, including a conditional one that
542557
// leads to a phi node.
543558
exists(InitializeIndirectionInstruction init |
544-
opFrom.getAnyDef() = init and
545-
iTo.(LoadInstruction).getSourceValueOperand() = opFrom and
559+
iFrom = init and
560+
iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = init and
546561
// Check that the types match. Otherwise we can get flow from an object to
547562
// its fields, which leads to field conflation when there's flow from other
548563
// fields to the object elsewhere.
@@ -551,13 +566,11 @@ private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo
551566
)
552567
or
553568
// Treat all conversions as flow, even conversions between different numeric types.
554-
iTo.(ConvertInstruction).getUnaryOperand() = opFrom and not opFrom.isDefinitionInexact()
569+
iTo.(ConvertInstruction).getUnary() = iFrom
555570
or
556-
iTo.(CheckedConvertOrNullInstruction).getUnaryOperand() = opFrom and
557-
not opFrom.isDefinitionInexact()
571+
iTo.(CheckedConvertOrNullInstruction).getUnary() = iFrom
558572
or
559-
iTo.(InheritanceConversionInstruction).getUnaryOperand() = opFrom and
560-
not opFrom.isDefinitionInexact()
573+
iTo.(InheritanceConversionInstruction).getUnary() = iFrom
561574
or
562575
// A chi instruction represents a point where a new value (the _partial_
563576
// operand) may overwrite an old value (the _total_ operand), but the alias
@@ -570,7 +583,7 @@ private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo
570583
//
571584
// Flow through the partial operand belongs in the taint-tracking libraries
572585
// for now.
573-
iTo.getAnOperand().(ChiTotalOperand) = opFrom
586+
iTo.getAnOperand().(ChiTotalOperand).getDef() = iFrom
574587
or
575588
// Add flow from write side-effects to non-conflated chi instructions through their
576589
// partial operands. From there, a `readStep` will find subsequent reads of that field.
@@ -585,25 +598,24 @@ private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo
585598
// Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to
586599
// `setX`, which will be melded into `p` through a chi instruction.
587600
exists(ChiInstruction chi | chi = iTo |
588-
opFrom.getAnyDef() instanceof WriteSideEffectInstruction and
589-
chi.getPartialOperand() = opFrom and
601+
chi.getPartialOperand().getDef() = iFrom.(WriteSideEffectInstruction) and
590602
not chi.isResultConflated()
591603
)
592604
or
593605
// Flow from stores to structs with a single field to a load of that field.
594-
iTo.(LoadInstruction).getSourceValueOperand() = opFrom and
606+
iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = iFrom and
595607
exists(int size, Type type, Class cTo |
596-
type = opFrom.getAnyDef().getResultType() and
608+
type = iFrom.getResultType() and
597609
cTo = iTo.getResultType() and
598610
cTo.getSize() = size and
599611
getFieldSizeOfClass(cTo, type, size)
600612
)
601613
or
602614
// Flow through modeled functions
603-
modelFlow(opFrom, iTo)
615+
modelFlow(iFrom, iTo)
604616
}
605617

606-
private predicate modelFlow(Operand opFrom, Instruction iTo) {
618+
private predicate modelFlow(Instruction iFrom, Instruction iTo) {
607619
exists(
608620
CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut
609621
|
@@ -628,17 +640,17 @@ private predicate modelFlow(Operand opFrom, Instruction iTo) {
628640
(
629641
exists(int index |
630642
modelIn.isParameter(index) and
631-
opFrom = call.getPositionalArgumentOperand(index)
643+
iFrom = call.getPositionalArgument(index)
632644
)
633645
or
634646
exists(int index, ReadSideEffectInstruction read |
635647
modelIn.isParameterDeref(index) and
636648
read = getSideEffectFor(call, index) and
637-
opFrom = read.getSideEffectOperand()
649+
iFrom = read.getSideEffectOperand().getAnyDef()
638650
)
639651
or
640652
modelIn.isQualifierAddress() and
641-
opFrom = call.getThisArgumentOperand()
653+
iFrom = call.getThisArgument()
642654
// TODO: add read side effects for qualifiers
643655
)
644656
)

0 commit comments

Comments
 (0)