C++: Several readability fixes:

MathiasVP · MathiasVP · commit a75f195df3d5 · 2021-10-30T10:24:06.000+01:00
1. Added lots of QLDoc explanation about the role of StoreNodeOperand.
  2. Renamed '{StoreNode,ReadNode}.getAPredecessor' to 'getInner' and
     '{StoreNode,ReadNode}.getASuccessor' to 'getOuter'.
  3. Be more explicit about which type of 'StoreNode' is used in various
     places.
diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll
@@ -187,7 +187,7 @@ predicate jumpStep(Node n1, Node n2) { none() }
  * Thus, `node2` references an object with a field `f` that contains the
  * value of `node1`.
  */
-predicate storeStep(StoreNode node1, FieldContent f, StoreNode node2) {
+predicate storeStep(StoreNodeInstr node1, FieldContent f, StoreNodeInstr node2) {
   exists(FieldAddressInstruction fai |
     node1.getInstruction() = fai and
     node2.getInstruction() = fai.getObjectAddress() and
diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
@@ -206,21 +206,15 @@ class OperandNode extends Node, TOperandNode {
  * A `StoreNode` is a node that has been (or is about to be) the
  * source or target of a `storeStep`.
  */
-abstract class StoreNode extends Node {
-  /** Gets the underlying instruction, if any. */
-  Instruction getInstruction() { none() }
-
-  /** Gets the underlying operand, if any. */
-  Operand getOperand() { none() }
-
+abstract private class StoreNode extends Node {
   /** Holds if this node should receive flow from `addr`. */
   abstract predicate flowInto(Instruction addr);
 
   override Declaration getEnclosingCallable() { result = this.getFunction() }
 
   /** Holds if this `StoreNode` is the root of the address computation used by a store operation. */
   predicate isTerminal() {
-    not exists(this.getAPredecessor()) and
+    not exists(this.getInner()) and
     not storeStep(this, _, _)
   }
 
@@ -233,20 +227,21 @@ abstract class StoreNode extends Node {
   /**
    * Gets the `StoreNode` that computes the address used by this `StoreNode`.
    */
-  abstract StoreNode getAPredecessor();
+  abstract StoreNode getInner();
 
-  /** The inverse of `StoreNode.getAPredecessor`. */
-  final StoreNode getASuccessor() { result.getAPredecessor() = this }
+  /** The inverse of `StoreNode.getInner`. */
+  final StoreNode getOuter() { result.getInner() = this }
 }
 
-private class StoreNodeInstr extends StoreNode, TStoreNodeInstr {
+class StoreNodeInstr extends StoreNode, TStoreNodeInstr {
   Instruction instr;
 
   StoreNodeInstr() { this = TStoreNodeInstr(instr) }
 
   override predicate flowInto(Instruction addr) { this.getInstruction() = addr }
 
-  override Instruction getInstruction() { result = instr }
+  /** Gets the underlying instruction. */
+  Instruction getInstruction() { result = instr }
 
   override Function getFunction() { result = this.getInstruction().getEnclosingFunction() }
 
@@ -262,19 +257,45 @@ private class StoreNodeInstr extends StoreNode, TStoreNodeInstr {
     Ssa::explicitWrite(_, result, this.getInstruction())
   }
 
-  override StoreNode getAPredecessor() {
+  override StoreNodeInstr getInner() {
     Ssa::addressFlow(result.getInstruction(), this.getInstruction())
   }
 }
 
-private class StoreNodeOperand extends StoreNode, TStoreNodeOperand {
+/**
+ * To avoid having `PostUpdateNode`s with multiple pre-update nodes (which can cause performance
+ * problems) we attach the `PostUpdateNode` that represent output arguments to an operand instead of
+ * an instruction.
+ *
+ * To see why we need this, consider the expression `b->set(new C())`. The IR of this expression looks
+ * like (simplified):
+ * ```
+ * r1(glval<unknown>) = FunctionAddress[set]            :
+ * r2(glval<unknown>) = FunctionAddress[operator new]   :
+ * r3(unsigned long)  = Constant[8]                     :
+ * r4(void *)         = Call[operator new]              : func:r2, 0:r3
+ * r5(C *)            = Convert                         : r4
+ * r6(glval<unknown>) = FunctionAddress[C]              :
+ * v1(void)           = Call[C]                         : func:r6, this:r5
+ * v2(void)           = Call[set]                       : func:r1, this:r0, 0:r5
+ * ```
+ *
+ * Notice that both the call to `C` and the call to `set` will have an argument that is the
+ * result of calling `operator new` (i.e., `r4`). If we only have `PostUpdateNode`s that are
+ * instructions, both `PostUpdateNode`s would have `r4` as their pre-update node.
+ *
+ * We avoid this issue by having a `PostUpdateNode` for each argument, and let the pre-update node of
+ * each `PostUpdateNode` be the argument _operand_, instead of the defining instruction.
+ */
+class StoreNodeOperand extends StoreNode, TStoreNodeOperand {
   ArgumentOperand operand;
 
   StoreNodeOperand() { this = TStoreNodeOperand(operand) }
 
   override predicate flowInto(Instruction addr) { this.getOperand().getDef() = addr }
 
-  override Operand getOperand() { result = operand }
+  /** Gets the underlying operand. */
+  Operand getOperand() { result = operand }
 
   override Function getFunction() { result = operand.getDef().getEnclosingFunction() }
 
@@ -288,7 +309,32 @@ private class StoreNodeOperand extends StoreNode, TStoreNodeOperand {
     Ssa::explicitWrite(_, result, operand.getDef())
   }
 
-  override StoreNode getAPredecessor() { operand.getDef() = result.getInstruction() }
+  /**
+   * The result of `StoreNodeOperand.getInner` is the `StoreNodeInstr` representation the instruction
+   * that defines this operand. This means the graph of `getInner` looks like this:
+   * ```
+   * I---I---I
+   *  \   \   \
+   *   O   O   O
+   * ```
+   * where each `StoreNodeOperand` "hooks" into the chain computed by `StoreNodeInstr.getInner`.
+   * This means that the chain of `getInner` calls on the argument `&o.f` on an expression
+   * like `func(&o.f)` is:
+   * ```
+   * r4---r3---r2
+   *  \
+   *   0:r4
+   * ```
+   * where the IR for `func(&o.f)` looks like (simplified):
+   * ```
+   * r1(glval<unknown>) = FunctionAddress[func]        :
+   * r2(glval<O>)       = VariableAddress[o]           :
+   * r3(glval<int>)     = FieldAddress[f]              : r2
+   * r4(int *)          = CopyValue                    : r3
+   * v1(void)           = Call[func]                   : func:r1, 0:r4
+   * ```
+   */
+  override StoreNodeInstr getInner() { operand.getDef() = result.getInstruction() }
 }
 
 /**
@@ -326,22 +372,20 @@ class ReadNode extends Node, TReadNode {
    * Gets a read node with an underlying instruction that is used by this
    * underlying instruction to compute an address of a load instruction.
    */
-  final ReadNode getAPredecessor() {
-    Ssa::addressFlow(result.getInstruction(), this.getInstruction())
-  }
+  final ReadNode getInner() { Ssa::addressFlow(result.getInstruction(), this.getInstruction()) }
 
-  /** The inverse of `ReadNode.getAPredecessor`. */
-  final ReadNode getASuccessor() { result.getAPredecessor() = this }
+  /** The inverse of `ReadNode.getInner`. */
+  final ReadNode getOuter() { result.getInner() = this }
 
   /** Holds if this read node computes a value that will not be used for any future read nodes. */
   final predicate isTerminal() {
-    not exists(this.getASuccessor()) and
+    not exists(this.getOuter()) and
     not readStep(this, _, _)
   }
 
   /** Holds if this read node computes a value that has not yet been used for any read operations. */
   final predicate isInitial() {
-    not exists(this.getAPredecessor()) and
+    not exists(this.getInner()) and
     not readStep(_, _, this)
   }
 }
@@ -787,7 +831,7 @@ private module ReadNodeFlow {
   /** Holds if the read node `nodeTo` should receive flow from the read node `nodeFrom`. */
   predicate flowThrough(ReadNode nodeFrom, ReadNode nodeTo) {
     not readStep(nodeFrom, _, _) and
-    nodeFrom.getASuccessor() = nodeTo
+    nodeFrom.getOuter() = nodeTo
   }
 
   /**
@@ -800,7 +844,7 @@ private module ReadNodeFlow {
     // Use-use flow to another use of the same variable instruction
     Ssa::ssaFlow(nFrom, nodeTo)
     or
-    not exists(nFrom.getAPredecessor()) and
+    not exists(nFrom.getInner()) and
     exists(Node store |
       Ssa::explicitWrite(_, store.asInstruction(), nFrom.getInstruction()) and
       Ssa::ssaFlow(store, nodeTo)
@@ -833,15 +877,15 @@ private module StoreNodeFlow {
   predicate flowThrough(StoreNode nFrom, StoreNode nodeTo) {
     // Flow through a post update node that doesn't need a store step.
     not storeStep(nFrom, _, _) and
-    nodeTo.getASuccessor() = nFrom
+    nodeTo.getOuter() = nFrom
   }
 
   /**
    * Holds if flow should leave the store node `nodeFrom` and enter the node `nodeTo`.
    * This happens because we have traversed an entire chain of field dereferences
    * after a store operation.
    */
-  predicate flowOutOf(StoreNode nFrom, Node nodeTo) {
+  predicate flowOutOf(StoreNodeInstr nFrom, Node nodeTo) {
     nFrom.isTerminal() and
     Ssa::ssaFlow(nFrom, nodeTo)
   }
diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll
@@ -327,7 +327,7 @@ private module Cached {
     )
   }
 
-  private predicate fromStoreNode(StoreNode nodeFrom, Node nodeTo) {
+  private predicate fromStoreNode(StoreNodeInstr nodeFrom, Node nodeTo) {
     // Def-use flow from a `StoreNode`.
     exists(IRBlock bb1, int i1, IRBlock bb2, int i2, Def def, Use use |
       nodeFrom.isTerminal() and
@@ -388,10 +388,10 @@ private module Cached {
       def.hasRankInBlock(block, rnk) and
       nodeTo.hasInputAtRankInBlock(block, rnk)
     |
-      exists(StoreNode store |
-        store = nodeFrom and
-        store.isTerminal() and
-        def.getInstruction() = store.getStoreInstruction()
+      exists(StoreNodeInstr storeNode |
+        storeNode = nodeFrom and
+        storeNode.isTerminal() and
+        def.getInstruction() = storeNode.getStoreInstruction()
       )
       or
       def.getInstruction() = nodeFrom.asInstruction()
@@ -466,7 +466,7 @@ private module Cached {
       operand.getDef() = readNode.getInstruction()
     )
     or
-    exists(StoreNode storeNode, Instruction def |
+    exists(StoreNodeInstr storeNode, Instruction def |
       storeNode = nTo and
       def = operand.getDef()
     |

Original file line number	Diff line number	Diff line change
`@@ -327,7 +327,7 @@ private module Cached {`
`327`	`327`	`)`
`328`	`328`	`}`
`329`	`329`
`330`		`- private predicate fromStoreNode(StoreNode nodeFrom, Node nodeTo) {`
	`330`	`+ private predicate fromStoreNode(StoreNodeInstr nodeFrom, Node nodeTo) {`
`331`	`331`	// Def-use flow from a `StoreNode`.
`332`	`332`	`exists(IRBlock bb1, int i1, IRBlock bb2, int i2, Def def, Use use \|`
`333`	`333`	`nodeFrom.isTerminal() and`
`@@ -388,10 +388,10 @@ private module Cached {`
`388`	`388`	`def.hasRankInBlock(block, rnk) and`
`389`	`389`	`nodeTo.hasInputAtRankInBlock(block, rnk)`
`390`	`390`	`\|`
`391`		`- exists(StoreNode store \|`
`392`		`- store = nodeFrom and`
`393`		`- store.isTerminal() and`
`394`		`- def.getInstruction() = store.getStoreInstruction()`
	`391`	`+ exists(StoreNodeInstr storeNode \|`
	`392`	`+ storeNode = nodeFrom and`
	`393`	`+ storeNode.isTerminal() and`
	`394`	`+ def.getInstruction() = storeNode.getStoreInstruction()`
`395`	`395`	`)`
`396`	`396`	`or`
`397`	`397`	`def.getInstruction() = nodeFrom.asInstruction()`
`@@ -466,7 +466,7 @@ private module Cached {`
`466`	`466`	`operand.getDef() = readNode.getInstruction()`
`467`	`467`	`)`
`468`	`468`	`or`
`469`		`- exists(StoreNode storeNode, Instruction def \|`
	`469`	`+ exists(StoreNodeInstr storeNode, Instruction def \|`
`470`	`470`	`storeNode = nTo and`
`471`	`471`	`def = operand.getDef()`
`472`	`472`	`\|`