Skip to content

Commit 8caff41

Browse files
committed
C++: Throw away most of the usage of IR-computed def-use information. Instead, we rely on the shared SSA library's use-use edges.
1 parent 3a48857 commit 8caff41

File tree

2 files changed

+62
-74
lines changed

2 files changed

+62
-74
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,9 @@ class ReturnNode extends InstructionNode {
106106
Instruction primary;
107107

108108
ReturnNode() {
109-
exists(ReturnValueInstruction ret | instr = ret.getReturnValue() and primary = ret)
109+
exists(ReturnValueInstruction ret | instr = ret and primary = ret)
110110
or
111-
exists(ReturnIndirectionInstruction rii |
112-
instr = rii.getSideEffectOperand().getAnyDef() and primary = rii
113-
)
111+
exists(ReturnIndirectionInstruction rii | instr = rii and primary = rii)
114112
}
115113

116114
/** Gets the kind of this returned value. */

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 60 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,11 @@ class VariableNode extends Node, TVariableNode {
613613
*/
614614
InstructionNode instructionNode(Instruction instr) { result.getInstruction() = instr }
615615

616+
/**
617+
* Gets the node corresponding to `operand`.
618+
*/
619+
OperandNode operandNode(Operand operand) { result.getOperand() = operand }
620+
616621
/**
617622
* DEPRECATED: use `definitionByReferenceNodeFromArgument` instead.
618623
*
@@ -693,14 +698,32 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
693698
ReadNodeFlow::flowThrough(nodeFrom, nodeTo)
694699
or
695700
ReadNodeFlow::flowOutOf(nodeFrom, nodeTo)
701+
or
702+
// Adjacent-def-use and adjacent-use-use flow
703+
adjacentDefUseFlow(nodeFrom, nodeTo)
696704
}
697705

698-
pragma[noinline]
699-
private predicate getFieldSizeOfClass(Class c, Type type, int size) {
700-
exists(Field f |
701-
f.getDeclaringType() = c and
702-
f.getUnderlyingType() = type and
703-
type.getSize() = size
706+
private predicate adjacentDefUseFlow(Node nodeFrom, Node nodeTo) {
707+
// Flow that isn't already covered by field flow out of store/read nodes.
708+
not nodeFrom.asInstruction() = any(StoreNode pun).getStoreInstruction() and
709+
not nodeFrom.asInstruction() = any(ReadNode pun).getALoadInstruction() and
710+
(
711+
//Def-use flow
712+
Ssa::ssaFlow(nodeFrom, nodeTo)
713+
or
714+
exists(Instruction loadAddress | loadAddress = Ssa::getSourceAddressFromNode(nodeFrom) |
715+
// Use-use flow through reads
716+
exists(Node address |
717+
Ssa::addressFlowTC(address.asInstruction(), loadAddress) and
718+
Ssa::ssaFlow(address, nodeTo)
719+
)
720+
or
721+
// Use-use flow through stores.
722+
exists(Node store |
723+
Ssa::explicitWrite(_, store.asInstruction(), loadAddress) and
724+
Ssa::ssaFlow(store, nodeTo)
725+
)
726+
)
704727
)
705728
}
706729

@@ -789,40 +812,39 @@ private module StoreNodeFlow {
789812

790813
private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) {
791814
// Propagate flow from an instruction to its exact uses.
815+
// We do this for all instruction/operand pairs, except when the operand is the
816+
// side effect operand of a ReturnIndirectionInstruction, or the load operand of a LoadInstruction.
817+
// This is because we get these flows through the shared SSA library already, and including this
818+
// flow here will create multiple dataflow paths which creates a blowup in stage 3 of dataflow.
819+
(
820+
not any(ReturnIndirectionInstruction ret).getSideEffectOperand() = opTo and
821+
not any(LoadInstruction load).getSourceValueOperand() = opTo and
822+
not any(ReturnValueInstruction ret).getReturnValueOperand() = opTo
823+
) and
792824
opTo.getDef() = iFrom
793-
or
794-
opTo = any(ReadSideEffectInstruction read).getSideEffectOperand() and
795-
not iFrom.isResultConflated() and
796-
iFrom = opTo.getAnyDef()
797-
or
798-
// Loading a single `int` from an `int *` parameter is not an exact load since
799-
// the parameter may point to an entire array rather than a single `int`. The
800-
// following rule ensures that any flow going into the
801-
// `InitializeIndirectionInstruction`, even if it's for a different array
802-
// element, will propagate to a load of the first element.
803-
//
804-
// Since we're linking `InitializeIndirectionInstruction` and
805-
// `LoadInstruction` together directly, this rule will break if there's any
806-
// reassignment of the parameter indirection, including a conditional one that
807-
// leads to a phi node.
808-
exists(InitializeIndirectionInstruction init |
809-
iFrom = init and
810-
opTo.(LoadOperand).getAnyDef() = init and
811-
// Check that the types match. Otherwise we can get flow from an object to
812-
// its fields, which leads to field conflation when there's flow from other
813-
// fields to the object elsewhere.
814-
init.getParameter().getType().getUnspecifiedType().(DerivedType).getBaseType() =
815-
opTo.getType().getUnspecifiedType()
816-
)
817-
or
818-
// Flow from stores to structs with a single field to a load of that field.
819-
exists(LoadInstruction load |
820-
load.getSourceValueOperand() = opTo and
821-
opTo.getAnyDef() = iFrom and
822-
isSingleFieldClass(pragma[only_bind_out](pragma[only_bind_out](iFrom).getResultType()), opTo)
825+
}
826+
827+
pragma[noinline]
828+
private predicate getAddressType(LoadInstruction load, Type t) {
829+
exists(Instruction address |
830+
address = load.getSourceAddress() and
831+
t = address.getResultType()
823832
)
824833
}
825834

835+
/**
836+
* Like the AST dataflow library, we want to conflate the address and value of a reference. This class
837+
* represents the `LoadInstruction` that is generated from a reference dereference.
838+
*/
839+
private class ReferenceDereferenceInstruction extends LoadInstruction {
840+
ReferenceDereferenceInstruction() {
841+
exists(ReferenceType ref |
842+
getAddressType(this, ref) and
843+
this.getResultType() = ref.getBaseType()
844+
)
845+
}
846+
}
847+
826848
private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
827849
iTo.(CopyInstruction).getSourceValueOperand() = opFrom
828850
or
@@ -835,40 +857,8 @@ private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo
835857
or
836858
iTo.(InheritanceConversionInstruction).getUnaryOperand() = opFrom
837859
or
838-
// A chi instruction represents a point where a new value (the _partial_
839-
// operand) may overwrite an old value (the _total_ operand), but the alias
840-
// analysis couldn't determine that it surely will overwrite every bit of it or
841-
// that it surely will overwrite no bit of it.
842-
//
843-
// By allowing flow through the total operand, we ensure that flow is not lost
844-
// due to shortcomings of the alias analysis. We may get false flow in cases
845-
// where the data is indeed overwritten.
846-
//
847-
// Flow through the partial operand belongs in the taint-tracking libraries
848-
// for now.
849-
iTo.getAnOperand().(ChiTotalOperand) = opFrom
850-
or
851-
// Add flow from write side-effects to non-conflated chi instructions through their
852-
// partial operands. From there, a `readStep` will find subsequent reads of that field.
853-
// Consider the following example:
854-
// ```
855-
// void setX(Point* p, int new_x) {
856-
// p->x = new_x;
857-
// }
858-
// ...
859-
// setX(&p, taint());
860-
// ```
861-
// Here, a `WriteSideEffectInstruction` will provide a new definition for `p->x` after the call to
862-
// `setX`, which will be melded into `p` through a chi instruction.
863-
exists(ChiInstruction chi | chi = iTo |
864-
opFrom.getAnyDef() instanceof WriteSideEffectInstruction and
865-
chi.getPartialOperand() = opFrom and
866-
not chi.isResultConflated() and
867-
// In a call such as `set_value(&x->val);` we don't want the memory representing `x` to receive
868-
// dataflow by a simple step. Instead, this is handled by field flow. If we add a simple step here
869-
// we can get field-to-object flow.
870-
not chi.isPartialUpdate()
871-
)
860+
// Conflate references and values like in AST dataflow.
861+
iTo.(ReferenceDereferenceInstruction).getSourceAddressOperand() = opFrom
872862
or
873863
// Flow through modeled functions
874864
modelFlow(opFrom, iTo)

0 commit comments

Comments
 (0)