microsoft
diff --git a/‎.vscode/tasks.json
Lines changed: 17 additions & 1 deletion b/‎.vscode/tasks.json
Lines changed: 17 additions & 1 deletion
diff --git a/‎CODEOWNERS
Lines changed: 3 additions & 0 deletions b/‎CODEOWNERS
Lines changed: 3 additions & 0 deletions
diff --git a/‎cpp/ql/lib/experimental/semmle/code/cpp/dataflow/ProductFlow.qll
Lines changed: 87 additions & 21 deletions b/‎cpp/ql/lib/experimental/semmle/code/cpp/dataflow/ProductFlow.qll
Lines changed: 87 additions & 21 deletions
diff --git a/‎cpp/ql/src/experimental/Likely Bugs/OverrunWriteProductFlow.ql
Lines changed: 152 additions & 9 deletions b/‎cpp/ql/src/experimental/Likely Bugs/OverrunWriteProductFlow.ql
Lines changed: 152 additions & 9 deletions
diff --git a/‎cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql
Lines changed: 10 additions & 0 deletions b/‎cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql
Lines changed: 10 additions & 0 deletions
@@ -22,6 +22,22 @@
                 "command": "${config:python.pythonPath}",
             },
             "problemMatcher": []
+        },
+        {
+            "label": "Accept .expected changes from CI",
+            "type": "process",
+            // Non-Windows OS will usually have Python 3 already installed at /usr/bin/python3.
+            "command": "python3",
+            "args": [
+                "misc/scripts/accept-expected-changes-from-ci.py"
+            ],
+            "group": "build",
+            "windows": {
+                // On Windows, use whatever Python interpreter is configured for this workspace. The default is
+                // just `python`, so if Python is already on the path, this will find it.
+                "command": "${config:python.pythonPath}",
+            },
+            "problemMatcher": []
         }
     ]
-}
+}
@@ -40,3 +40,6 @@ WORKSPACE.bazel @github/codeql-ci-reviewers
 /.github/workflows/ql-for-ql-* @github/codeql-ql-for-ql-reviewers
 /.github/workflows/ruby-* @github/codeql-ruby
 /.github/workflows/swift.yml @github/codeql-swift
+
+# Misc
+/misc/scripts/accept-expected-changes-from-ci.py @RasmusWL
@@ -290,9 +290,9 @@ module ProductFlow {
       predicate isBarrierIn(DataFlow::Node node) { Config::isBarrierIn1(node) }
     }
 
-    module Flow1 = DataFlow::GlobalWithState<Config1>;
+    private module Flow1 = DataFlow::GlobalWithState<Config1>;
 
-    module Config2 implements DataFlow::StateConfigSig {
+    private module Config2 implements DataFlow::StateConfigSig {
       class FlowState = FlowState2;
 
       predicate isSource(DataFlow::Node source, FlowState state) {
@@ -322,27 +322,90 @@ module ProductFlow {
       predicate isBarrierIn(DataFlow::Node node) { Config::isBarrierIn2(node) }
     }
 
-    module Flow2 = DataFlow::GlobalWithState<Config2>;
+    private module Flow2 = DataFlow::GlobalWithState<Config2>;
 
+    private predicate isSourcePair(Flow1::PathNode node1, Flow2::PathNode node2) {
+      Config::isSourcePair(node1.getNode(), node1.getState(), node2.getNode(), node2.getState())
+    }
+
+    private predicate isSinkPair(Flow1::PathNode node1, Flow2::PathNode node2) {
+      Config::isSinkPair(node1.getNode(), node1.getState(), node2.getNode(), node2.getState())
+    }
+
+    pragma[assume_small_delta]
     pragma[nomagic]
-    private predicate reachableInterprocEntry(
-      Flow1::PathNode source1, Flow2::PathNode source2, Flow1::PathNode node1, Flow2::PathNode node2
+    private predicate fwdReachableInterprocEntry(Flow1::PathNode node1, Flow2::PathNode node2) {
+      isSourcePair(node1, node2)
+      or
+      fwdIsSuccessor(_, _, node1, node2)
+    }
+
+    pragma[nomagic]
+    private predicate fwdIsSuccessorExit(
+      Flow1::PathNode mid1, Flow2::PathNode mid2, Flow1::PathNode succ1, Flow2::PathNode succ2
     ) {
-      Config::isSourcePair(node1.getNode(), node1.getState(), node2.getNode(), node2.getState()) and
-      node1 = source1 and
-      node2 = source2
+      isSinkPair(mid1, mid2) and
+      succ1 = mid1 and
+      succ2 = mid2
       or
-      exists(
-        Flow1::PathNode midEntry1, Flow2::PathNode midEntry2, Flow1::PathNode midExit1,
-        Flow2::PathNode midExit2
-      |
-        reachableInterprocEntry(source1, source2, midEntry1, midEntry2) and
-        interprocEdgePair(midExit1, midExit2, node1, node2) and
-        localPathStep1*(midEntry1, midExit1) and
-        localPathStep2*(midEntry2, midExit2)
+      interprocEdgePair(mid1, mid2, succ1, succ2)
+    }
+
+    private predicate fwdIsSuccessor1(
+      Flow1::PathNode pred1, Flow2::PathNode pred2, Flow1::PathNode mid1, Flow2::PathNode mid2,
+      Flow1::PathNode succ1, Flow2::PathNode succ2
+    ) {
+      fwdReachableInterprocEntry(pred1, pred2) and
+      localPathStep1*(pred1, mid1) and
+      fwdIsSuccessorExit(pragma[only_bind_into](mid1), pragma[only_bind_into](mid2), succ1, succ2)
+    }
+
+    private predicate fwdIsSuccessor2(
+      Flow1::PathNode pred1, Flow2::PathNode pred2, Flow1::PathNode mid1, Flow2::PathNode mid2,
+      Flow1::PathNode succ1, Flow2::PathNode succ2
+    ) {
+      fwdReachableInterprocEntry(pred1, pred2) and
+      localPathStep2*(pred2, mid2) and
+      fwdIsSuccessorExit(pragma[only_bind_into](mid1), pragma[only_bind_into](mid2), succ1, succ2)
+    }
+
+    pragma[assume_small_delta]
+    private predicate fwdIsSuccessor(
+      Flow1::PathNode pred1, Flow2::PathNode pred2, Flow1::PathNode succ1, Flow2::PathNode succ2
+    ) {
+      exists(Flow1::PathNode mid1, Flow2::PathNode mid2 |
+        fwdIsSuccessor1(pred1, pred2, mid1, mid2, succ1, succ2) and
+        fwdIsSuccessor2(pred1, pred2, mid1, mid2, succ1, succ2)
+      )
+    }
+
+    pragma[assume_small_delta]
+    pragma[nomagic]
+    private predicate revReachableInterprocEntry(Flow1::PathNode node1, Flow2::PathNode node2) {
+      fwdReachableInterprocEntry(node1, node2) and
+      isSinkPair(node1, node2)
+      or
+      exists(Flow1::PathNode succ1, Flow2::PathNode succ2 |
+        revReachableInterprocEntry(succ1, succ2) and
+        fwdIsSuccessor(node1, node2, succ1, succ2)
       )
     }
 
+    private newtype TNodePair =
+      TMkNodePair(Flow1::PathNode node1, Flow2::PathNode node2) {
+        revReachableInterprocEntry(node1, node2)
+      }
+
+    private predicate pathSucc(TNodePair n1, TNodePair n2) {
+      exists(Flow1::PathNode n11, Flow2::PathNode n12, Flow1::PathNode n21, Flow2::PathNode n22 |
+        n1 = TMkNodePair(n11, n12) and
+        n2 = TMkNodePair(n21, n22) and
+        fwdIsSuccessor(n11, n12, n21, n22)
+      )
+    }
+
+    private predicate pathSuccPlus(TNodePair n1, TNodePair n2) = fastTC(pathSucc/2)(n1, n2)
+
     private predicate localPathStep1(Flow1::PathNode pred, Flow1::PathNode succ) {
       Flow1::PathGraph::edges(pred, succ) and
       pragma[only_bind_out](pred.getNode().getEnclosingCallable()) =
@@ -474,11 +537,14 @@ module ProductFlow {
     private predicate reachable(
       Flow1::PathNode source1, Flow2::PathNode source2, Flow1::PathNode sink1, Flow2::PathNode sink2
     ) {
-      exists(Flow1::PathNode mid1, Flow2::PathNode mid2 |
-        reachableInterprocEntry(source1, source2, mid1, mid2) and
-        Config::isSinkPair(sink1.getNode(), sink1.getState(), sink2.getNode(), sink2.getState()) and
-        localPathStep1*(mid1, sink1) and
-        localPathStep2*(mid2, sink2)
+      isSourcePair(source1, source2) and
+      isSinkPair(sink1, sink2) and
+      exists(TNodePair n1, TNodePair n2 |
+        n1 = TMkNodePair(source1, source2) and
+        n2 = TMkNodePair(sink1, sink2)
+      |
+        pathSuccPlus(n1, n2) or
+        n1 = n2
       )
     }
   }
 
@@ -78,6 +78,149 @@ predicate isSinkPairImpl(
   )
 }
 
+module ValidState {
+  /**
+   * In the `StringSizeConfig` configuration we use an integer as the flow state for the second
+   * projection of the dataflow graph. The integer represents an offset that is added to the
+   * size of the allocation. For example, given:
+   * ```cpp
+   * char* p = new char[size + 1];
+   * size += 1;
+   * memset(p, 0, size);
+   * ```
+   * the initial flow state is `1`. This represents the fact that `size + 1` is a valid bound
+   * for the size of the allocation pointed to by `p`. After updating the size using `+=`, the
+   * flow state changes to `0`, which represents the fact that `size + 0` is a valid bound for
+   * the allocation.
+   *
+   * So we need to compute a set of valid integers that represent the offset applied to the
+   * size. We do this in two steps:
+   * 1. We first perform the dataflow traversal that the second projection of the product-flow
+   * library will perform, and visit all the places where the size argument is modified.
+   * 2. Once that dataflow traversal is done, we accumulate the offsets added at each places
+   * where the offset is modified (see `validStateImpl`).
+   *
+   * Because we want to guarantee that each place where we modify the offset has a `PathNode`
+   * we "flip" a boolean flow state in each `isAdditionalFlowStep`. This ensures that the node
+   * has a corresponding `PathNode`.
+   */
+  private module ValidStateConfig implements DataFlow::StateConfigSig {
+    class FlowState = boolean;
+
+    predicate isSource(DataFlow::Node source, FlowState state) {
+      hasSize(_, source, _) and
+      state = false
+    }
+
+    predicate isSink(DataFlow::Node sink, FlowState state) {
+      isSinkPairImpl(_, _, sink, _, _) and
+      state = [false, true]
+    }
+
+    predicate isBarrier(DataFlow::Node node, FlowState state) { none() }
+
+    predicate isAdditionalFlowStep(
+      DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
+    ) {
+      exists(AddInstruction add, Operand op, int delta |
+        add.hasOperands(node1.asOperand(), op) and
+        semBounded(getSemanticExpr(op.getDef()), any(SemZeroBound zero), delta, true, _) and
+        node2.asInstruction() = add and
+        state1 = [false, true] and
+        state2 = state1.booleanNot()
+      )
+    }
+
+    predicate includeHiddenNodes() { any() }
+  }
+
+  private import DataFlow::GlobalWithState<ValidStateConfig>
+
+  private predicate inLoop(PathNode n) { n.getASuccessor+() = n }
+
+  /**
+   * Holds if `value` is a possible offset for `n`.
+   *
+   * To ensure termination, we limit `value` to be in the
+   * range `[-2, 2]` if the node is part of a loop. Without
+   * this restriction we wouldn't terminate on an example like:
+   * ```cpp
+   * while(unknown()) { size++; }
+   * ```
+   */
+  private predicate validStateImpl(PathNode n, int value) {
+    // If the dataflow node depends recursively on itself we restrict the range.
+    (inLoop(n) implies value = [-2 .. 2]) and
+    (
+      // For the dataflow source we have an allocation such as `malloc(size + k)`,
+      // and the value of the flow-state is then `k`.
+      hasSize(_, n.getNode(), value)
+      or
+      // For a dataflow sink any `value` that is strictly smaller than the delta
+      // needs to be a valid flow-state. That is, for a snippet like:
+      // ```
+      // p = b ? new char[size] : new char[size + 1];
+      // memset(p, 0, size + 2);
+      // ```
+      // the valid flow-states at the `memset` must include the set `{0, 1}` since the
+      // flow-state at `new char[size]` is `0`, and the flow-state at `new char[size + 1]`
+      // is `1`.
+      //
+      // So we find a valid flow-state at the sink's predecessor, and use the definition
+      // of our sink predicate to compute the valid flow-states at the sink.
+      exists(int delta, PathNode n0 |
+        n0.getASuccessor() = n and
+        validStateImpl(n0, value) and
+        isSinkPairImpl(_, _, n.getNode(), delta, _) and
+        delta > value
+      )
+      or
+      // For a non-source and non-sink node there is two cases to consider.
+      // 1. A node where we have to update the flow-state, or
+      // 2. A node that doesn't update the flow-state.
+      //
+      // For case 1, we compute the new flow-state by adding the constant operand of the
+      // `AddInstruction` to the flow-state of any predecessor node.
+      // For case 2 we simply propagate the valid flow-states from the predecessor node to
+      // the next one.
+      exists(PathNode n0, DataFlow::Node node0, DataFlow::Node node, int value0 |
+        n0.getASuccessor() = n and
+        validStateImpl(n0, value0) and
+        node = n.getNode() and
+        node0 = n0.getNode()
+      |
+        exists(int delta |
+          isAdditionalFlowStep2(node0, node, delta) and
+          value0 = value + delta
+        )
+        or
+        not isAdditionalFlowStep2(node0, node, _) and
+        value = value0
+      )
+    )
+  }
+
+  predicate validState(DataFlow::Node n, int value) {
+    validStateImpl(any(PathNode pn | pn.getNode() = n), value)
+  }
+}
+
+import ValidState
+
+/**
+ * Holds if `node2` is a dataflow node that represents an addition of two operands `op1`
+ * and `op2` such that:
+ * 1. `node1` is the dataflow node that represents `op1`, and
+ * 2. the value of `op2` can be upper bounded by `delta.`
+ */
+predicate isAdditionalFlowStep2(DataFlow::Node node1, DataFlow::Node node2, int delta) {
+  exists(AddInstruction add, Operand op |
+    add.hasOperands(node1.asOperand(), op) and
+    semBounded(getSemanticExpr(op.getDef()), any(SemZeroBound zero), delta, true, _) and
+    node2.asInstruction() = add
+  )
+}
+
 module StringSizeConfig implements ProductFlow::StateConfigSig {
   class FlowState1 = Unit;
 
@@ -100,7 +243,7 @@ module StringSizeConfig implements ProductFlow::StateConfigSig {
     DataFlow::Node bufSink, FlowState1 state1, DataFlow::Node sizeSink, FlowState2 state2
   ) {
     exists(state1) and
-    state2 = [-32 .. 32] and // An arbitrary bound because we need to bound `state2`
+    validState(sizeSink, state2) and
     exists(int delta |
       isSinkPairImpl(_, bufSink, sizeSink, delta, _) and
       delta > state2
@@ -111,6 +254,10 @@ module StringSizeConfig implements ProductFlow::StateConfigSig {
 
   predicate isBarrier2(DataFlow::Node node, FlowState2 state) { none() }
 
+  predicate isBarrierOut2(DataFlow::Node node) {
+    node = any(DataFlow::SsaPhiNode phi).getAnInput(true)
+  }
+
   predicate isAdditionalFlowStep1(
     DataFlow::Node node1, FlowState1 state1, DataFlow::Node node2, FlowState1 state2
   ) {
@@ -120,14 +267,10 @@ module StringSizeConfig implements ProductFlow::StateConfigSig {
   predicate isAdditionalFlowStep2(
     DataFlow::Node node1, FlowState2 state1, DataFlow::Node node2, FlowState2 state2
   ) {
-    exists(AddInstruction add, Operand op, int delta, int s1, int s2 |
-      s1 = [-32 .. 32] and // An arbitrary bound because we need to bound `state`
-      state1 = s1 and
-      state2 = s2 and
-      add.hasOperands(node1.asOperand(), op) and
-      semBounded(getSemanticExpr(op.getDef()), any(SemZeroBound zero), delta, true, _) and
-      node2.asInstruction() = add and
-      s1 = s2 + delta
+    validState(node2, state2) and
+    exists(int delta |
+      isAdditionalFlowStep2(node1, node2, delta) and
+      state1 = state2 + delta
     )
   }
 }
 
@@ -324,6 +324,16 @@ query predicate edges(MergedPathNode node1, MergedPathNode node2) {
   joinOn2(node1.asPathNode3(), node2.asSinkNode(), _)
 }
 
+query predicate subpaths(
+  MergedPathNode arg, MergedPathNode par, MergedPathNode ret, MergedPathNode out
+) {
+  AllocToInvalidPointerFlow::PathGraph1::subpaths(arg.asPathNode1(), par.asPathNode1(),
+    ret.asPathNode1(), out.asPathNode1())
+  or
+  InvalidPointerToDerefFlow::PathGraph::subpaths(arg.asPathNode3(), par.asPathNode3(),
+    ret.asPathNode3(), out.asPathNode3())
+}
+
 /**
  * Holds if `p1` is a sink of `AllocToInvalidPointerConf` and `p2` is a source
  * of `InvalidPointerToDerefConf`, and they are connected through `pai`.