Skip to content

Commit 7ac9c1e

Browse files
authored
Merge pull request github#10713 from MathiasVP/fix-types-in-ir-dataflow
C++: Fix `getType` for experimental IR dataflow
2 parents b3f1031 + 95e7985 commit 7ac9c1e

File tree

5 files changed

+88
-38
lines changed

5 files changed

+88
-38
lines changed

cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ private newtype TReturnKind =
137137
exists(IndirectReturnNode return, ReturnIndirectionInstruction returnInd |
138138
returnInd.hasIndex(argumentIndex) and
139139
return.getAddressOperand() = returnInd.getSourceAddressOperand() and
140-
indirectionIndex = return.getIndirectionIndex() - 1 // We subtract one because the return loads the value.
140+
indirectionIndex = return.getIndirectionIndex()
141141
)
142142
}
143143

@@ -197,7 +197,7 @@ class ReturnIndirectionNode extends IndirectReturnNode, ReturnNode {
197197
exists(int argumentIndex, ReturnIndirectionInstruction returnInd |
198198
returnInd.hasIndex(argumentIndex) and
199199
this.getAddressOperand() = returnInd.getSourceAddressOperand() and
200-
result = TIndirectReturnKind(argumentIndex, this.getIndirectionIndex() - 1) and
200+
result = TIndirectReturnKind(argumentIndex, this.getIndirectionIndex()) and
201201
hasNonInitializeParameterDef(returnInd.getIRVariable())
202202
)
203203
or
@@ -365,7 +365,7 @@ predicate jumpStep(Node n1, Node n2) {
365365
predicate storeStep(Node node1, Content c, PostFieldUpdateNode node2) {
366366
exists(int indirectionIndex1, int numberOfLoads, StoreInstruction store |
367367
nodeHasInstruction(node1, store, pragma[only_bind_into](indirectionIndex1)) and
368-
node2.getIndirectionIndex() = 0 and
368+
node2.getIndirectionIndex() = 1 and
369369
numberOfLoadsFromOperand(node2.getFieldAddress(), store.getDestinationAddressOperand(),
370370
numberOfLoads)
371371
|
@@ -465,20 +465,20 @@ predicate clearsContent(Node n, Content c) {
465465
predicate expectsContent(Node n, ContentSet c) { none() }
466466

467467
/** Gets the type of `n` used for type pruning. */
468-
IRType getNodeType(Node n) {
468+
DataFlowType getNodeType(Node n) {
469469
suppressUnusedNode(n) and
470-
result instanceof IRVoidType // stub implementation
470+
result instanceof VoidType // stub implementation
471471
}
472472

473473
/** Gets a string representation of a type returned by `getNodeType`. */
474-
string ppReprType(IRType t) { none() } // stub implementation
474+
string ppReprType(DataFlowType t) { none() } // stub implementation
475475

476476
/**
477477
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
478478
* a node of type `t1` to a node of type `t2`.
479479
*/
480480
pragma[inline]
481-
predicate compatibleTypes(IRType t1, IRType t2) {
481+
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) {
482482
any() // stub implementation
483483
}
484484

@@ -502,7 +502,7 @@ class DataFlowCallable = Cpp::Declaration;
502502

503503
class DataFlowExpr = Expr;
504504

505-
class DataFlowType = IRType;
505+
class DataFlowType = Type;
506506

507507
/** A function call relevant for data flow. */
508508
class DataFlowCall extends CallInstruction {

cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 74 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,12 @@ private module Cached {
3838
TVariableNode(Variable var) or
3939
TPostFieldUpdateNode(FieldAddress operand, int indirectionIndex) {
4040
indirectionIndex =
41-
[0 .. Ssa::countIndirectionsForCppType(operand.getObjectAddress().getResultLanguageType()) -
42-
1]
41+
[1 .. Ssa::countIndirectionsForCppType(operand.getObjectAddress().getResultLanguageType())]
4342
} or
4443
TSsaPhiNode(Ssa::PhiNode phi) or
4544
TIndirectArgumentOutNode(ArgumentOperand operand, int indirectionIndex) {
4645
Ssa::isModifiableByCall(operand) and
47-
indirectionIndex = [0 .. Ssa::countIndirectionsForCppType(operand.getLanguageType()) - 1]
46+
indirectionIndex = [1 .. Ssa::countIndirectionsForCppType(operand.getLanguageType())]
4847
} or
4948
TIndirectOperand(Operand op, int indirectionIndex) {
5049
Ssa::hasIndirectOperand(op, indirectionIndex)
@@ -113,7 +112,7 @@ class Node extends TIRDataFlowNode {
113112
Declaration getFunction() { none() } // overridden in subclasses
114113

115114
/** Gets the type of this node. */
116-
IRType getType() { none() } // overridden in subclasses
115+
DataFlowType getType() { none() } // overridden in subclasses
117116

118117
/** Gets the instruction corresponding to this node, if any. */
119118
Instruction asInstruction() { result = this.(InstructionNode).getInstruction() }
@@ -273,7 +272,7 @@ class Node extends TIRDataFlowNode {
273272
/**
274273
* Gets an upper bound on the type of this node.
275274
*/
276-
IRType getTypeBound() { result = this.getType() }
275+
DataFlowType getTypeBound() { result = this.getType() }
277276

278277
/** Gets the location of this element. */
279278
cached
@@ -322,7 +321,7 @@ class InstructionNode extends Node, TInstructionNode {
322321

323322
override Declaration getFunction() { result = instr.getEnclosingFunction() }
324323

325-
override IRType getType() { result = instr.getResultIRType() }
324+
override DataFlowType getType() { result = instr.getResultType() }
326325

327326
final override Location getLocationImpl() { result = instr.getLocation() }
328327

@@ -348,13 +347,32 @@ class OperandNode extends Node, TOperandNode {
348347

349348
override Declaration getFunction() { result = op.getUse().getEnclosingFunction() }
350349

351-
override IRType getType() { result = op.getIRType() }
350+
override DataFlowType getType() { result = op.getType() }
352351

353352
final override Location getLocationImpl() { result = op.getLocation() }
354353

355354
override string toStringImpl() { result = this.getOperand().toString() }
356355
}
357356

357+
/**
358+
* Returns `t`, but stripped of the `n` outermost pointers, references, etc.
359+
*
360+
* For example, `stripPointers(int*&, 2)` is `int` and `stripPointers(int*, 0)` is `int*`.
361+
*/
362+
private Type stripPointers(Type t, int n) {
363+
result = t and n = 0
364+
or
365+
result = stripPointers(t.(PointerType).getBaseType(), n - 1)
366+
or
367+
result = stripPointers(t.(ArrayType).getBaseType(), n - 1)
368+
or
369+
result = stripPointers(t.(ReferenceType).getBaseType(), n - 1)
370+
or
371+
result = stripPointers(t.(PointerToMemberType).getBaseType(), n - 1)
372+
or
373+
result = stripPointers(t.(FunctionPointerIshType).getBaseType(), n - 1)
374+
}
375+
358376
/**
359377
* INTERNAL: do not use.
360378
*
@@ -370,19 +388,15 @@ class PostFieldUpdateNode extends TPostFieldUpdateNode, PartialDefinitionNode {
370388

371389
override Declaration getEnclosingCallable() { result = this.getFunction() }
372390

373-
override IRType getType() { result = fieldAddress.getIRType() }
374-
375391
FieldAddress getFieldAddress() { result = fieldAddress }
376392

377393
Field getUpdatedField() { result = fieldAddress.getField() }
378394

379395
int getIndirectionIndex() { result = indirectionIndex }
380396

381397
override Node getPreUpdateNode() {
382-
// + 1 because we're storing into an lvalue, and the original node should be the rvalue of
383-
// the same address.
384398
hasOperandAndIndex(result, pragma[only_bind_into](fieldAddress).getObjectAddressOperand(),
385-
indirectionIndex + 1)
399+
indirectionIndex)
386400
}
387401

388402
override Expr getDefinedExpr() {
@@ -411,7 +425,7 @@ class SsaPhiNode extends Node, TSsaPhiNode {
411425

412426
override Declaration getFunction() { result = phi.getBasicBlock().getEnclosingFunction() }
413427

414-
override IRType getType() { result instanceof IRVoidType }
428+
override DataFlowType getType() { result = this.getAnInput().getType() }
415429

416430
final override Location getLocationImpl() { result = phi.getBasicBlock().getLocation() }
417431

@@ -454,8 +468,6 @@ class SideEffectOperandNode extends Node, IndirectOperand {
454468

455469
override Function getFunction() { result = call.getEnclosingFunction() }
456470

457-
override IRType getType() { result instanceof IRVoidType }
458-
459471
Expr getArgument() { result = call.getArgument(argumentIndex).getUnconvertedResultExpression() }
460472
}
461473

@@ -478,8 +490,6 @@ class IndirectParameterNode extends Node, IndirectInstruction {
478490

479491
override Function getFunction() { result = this.getInstruction().getEnclosingFunction() }
480492

481-
override IRType getType() { result instanceof IRVoidType }
482-
483493
override string toStringImpl() {
484494
result = this.getParameter().toString() + " indirection"
485495
or
@@ -504,8 +514,6 @@ class IndirectReturnNode extends IndirectOperand {
504514
Operand getAddressOperand() { result = operand }
505515

506516
override Declaration getEnclosingCallable() { result = this.getFunction() }
507-
508-
override IRType getType() { result instanceof IRVoidType }
509517
}
510518

511519
/**
@@ -536,9 +544,7 @@ class IndirectArgumentOutNode extends Node, TIndirectArgumentOutNode, PostUpdate
536544

537545
override Function getFunction() { result = this.getCallInstruction().getEnclosingFunction() }
538546

539-
override IRType getType() { result instanceof IRVoidType }
540-
541-
override Node getPreUpdateNode() { hasOperandAndIndex(result, operand, indirectionIndex + 1) }
547+
override Node getPreUpdateNode() { hasOperandAndIndex(result, operand, indirectionIndex) }
542548

543549
override string toStringImpl() {
544550
// This string should be unique enough to be helpful but common enough to
@@ -594,6 +600,38 @@ class IndirectReturnOutNode extends Node {
594600
int getIndirectionIndex() { result = indirectionIndex }
595601
}
596602

603+
private PointerType getGLValueType(Type t, int indirectionIndex) {
604+
result.getBaseType() = stripPointers(t, indirectionIndex - 1)
605+
}
606+
607+
bindingset[isGLValue]
608+
private DataFlowType getTypeImpl(Type t, int indirectionIndex, boolean isGLValue) {
609+
if isGLValue = true
610+
then
611+
result = getGLValueType(t, indirectionIndex)
612+
or
613+
// Ideally, the above case would cover all glvalue cases. However, consider the case where
614+
// the database consists only of:
615+
// ```
616+
// void test() {
617+
// int* x;
618+
// x = nullptr;
619+
// }
620+
// ```
621+
// and we want to compute the type of `*x` in the assignment `x = nullptr`. Here, `x` is an lvalue
622+
// of type int* (which morally is an int**). So when we call `getTypeImpl` it will be with the
623+
// parameters:
624+
// - t = int*
625+
// - indirectionIndex = 1 (when we want to model the dataflow node corresponding to *x)
626+
// - isGLValue = true
627+
// In this case, `getTypeImpl(t, indirectionIndex, isGLValue)` should give back `int**`. In this
628+
// case, however, `int**` does not exist in the database. So instead we return int* (which is
629+
// wrong, but at least we have a type).
630+
not exists(getGLValueType(t, indirectionIndex)) and
631+
result = stripPointers(t, indirectionIndex - 1)
632+
else result = stripPointers(t, indirectionIndex)
633+
}
634+
597635
/**
598636
* INTERNAL: Do not use.
599637
*
@@ -615,7 +653,11 @@ class IndirectOperand extends Node, TIndirectOperand {
615653

616654
override Declaration getEnclosingCallable() { result = this.getFunction() }
617655

618-
override IRType getType() { result = this.getOperand().getIRType() }
656+
override DataFlowType getType() {
657+
exists(boolean isGLValue | if operand.isGLValue() then isGLValue = true else isGLValue = false |
658+
result = getTypeImpl(operand.getType().getUnspecifiedType(), indirectionIndex, isGLValue)
659+
)
660+
}
619661

620662
final override Location getLocationImpl() { result = this.getOperand().getLocation() }
621663

@@ -645,7 +687,11 @@ class IndirectInstruction extends Node, TIndirectInstruction {
645687

646688
override Declaration getEnclosingCallable() { result = this.getFunction() }
647689

648-
override IRType getType() { result = this.getInstruction().getResultIRType() }
690+
override DataFlowType getType() {
691+
exists(boolean isGLValue | if instr.isGLValue() then isGLValue = true else isGLValue = false |
692+
result = getTypeImpl(instr.getResultType().getUnspecifiedType(), indirectionIndex, isGLValue)
693+
)
694+
}
649695

650696
final override Location getLocationImpl() { result = this.getInstruction().getLocation() }
651697

@@ -859,6 +905,8 @@ abstract class PostUpdateNode extends Node {
859905
* Gets the node before the state update.
860906
*/
861907
abstract Node getPreUpdateNode();
908+
909+
final override DataFlowType getType() { result = this.getPreUpdateNode().getType() }
862910
}
863911

864912
/**
@@ -922,7 +970,7 @@ class VariableNode extends Node, TVariableNode {
922970
result = v
923971
}
924972

925-
override IRType getType() { result.getCanonicalLanguageType().hasUnspecifiedType(v.getType(), _) }
973+
override DataFlowType getType() { result = v.getType() }
926974

927975
final override Location getLocationImpl() { result = v.getLocation() }
928976

@@ -1075,7 +1123,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
10751123
store.getDestinationAddressOperand() = address
10761124
)
10771125
or
1078-
Ssa::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex - 1)
1126+
Ssa::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex)
10791127
)
10801128
}
10811129

cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Node callOutput(CallInstruction call, FunctionOutput output) {
4141
// The side effect of a call on the value pointed to by an argument or qualifier
4242
exists(int index, int indirectionIndex |
4343
result.(IndirectArgumentOutNode).getArgumentIndex() = index and
44-
result.(IndirectArgumentOutNode).getIndirectionIndex() + 1 = indirectionIndex and
44+
result.(IndirectArgumentOutNode).getIndirectionIndex() = indirectionIndex and
4545
result.(IndirectArgumentOutNode).getCallInstruction() = call and
4646
output.isParameterDerefOrQualifierObject(index, indirectionIndex)
4747
)

cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ private import DataFlowUtil
1111
* corresponding `(Indirect)OperandNode`.
1212
*/
1313
predicate ignoreOperand(Operand operand) {
14-
operand = any(Instruction instr | ignoreInstruction(instr)).getAnOperand()
14+
operand = any(Instruction instr | ignoreInstruction(instr)).getAnOperand() or
15+
operand = any(Instruction instr | ignoreInstruction(instr)).getAUse() or
16+
operand instanceof MemoryOperand
1517
}
1618

1719
/**

cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ private module SourceVariables {
3636

3737
override string toString() { result = var.toString() }
3838

39-
override DataFlowType getType() { result = var.getIRType() }
39+
override DataFlowType getType() { result = var.getType() }
4040
}
4141

4242
class BaseCallVariable extends BaseSourceVariable, TBaseCallVariable {
@@ -48,7 +48,7 @@ private module SourceVariables {
4848

4949
override string toString() { result = call.toString() }
5050

51-
override DataFlowType getType() { result = call.getResultIRType() }
51+
override DataFlowType getType() { result = call.getResultType() }
5252
}
5353

5454
private newtype TSourceVariable =

0 commit comments

Comments
 (0)