Merge pull request github#15820 from MathiasVP/add-type-confusion-query

MathiasVP · web-flow · commit 1a42e55095db · 2024-03-12T10:29:22.000Z
C++: Add a new query for detecting type confusion vulnerabilities
diff --git a/cpp/ql/src/Security/CWE/CWE-843/TypeConfusion.qhelp b/cpp/ql/src/Security/CWE/CWE-843/TypeConfusion.qhelp
@@ -0,0 +1,50 @@
+<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Certain casts in C and C++ place no restrictions on the target type. For
+example, C style casts such as <code>(MyClass*)p</code> allows the programmer
+to cast any pointer <code>p</code> to an expression of type <code>MyClass*</code>.
+If the runtime type of <code>p</code> turns out to be a type that's incompatible
+with <code>MyClass</code>, this results in undefined behavior.
+</p>
+</overview>
+
+<recommendation>
+<p>
+If possible, use <code>dynamic_cast</code> to safely cast between polymorphic types.
+If <code>dynamic_cast</code> is not an option, use <code>static_cast</code> to restrict
+the kinds of conversions that the compiler is allowed to perform. If C++ style casts is
+not an option, carefully check that all casts are safe.
+</p>
+</recommendation>
+
+<example>
+<p>
+Consider the following class hierachy where we define a base class <code>Shape</code> and two
+derived classes <code>Circle</code> and <code>Square</code> that are mutually incompatible:
+</p>
+<sample src="TypeConfusionCommon.cpp"/>
+
+<p>
+The following code demonstrates a type confusion vulnerability where the programmer
+assumes that the runtime type of <code>p</code> is always a <code>Square</code>.
+However, if <code>p</code> is a <code>Circle</code>, the cast will result in undefined behavior.
+</p>
+<sample src="TypeConfusionBad.cpp"/>
+
+<p>
+The following code fixes the vulnerability by using <code>dynamic_cast</code> to
+safely cast between polymorphic types. If the cast fails, <code>dynamic_cast</code>
+returns a null pointer, which can be checked for and handled appropriately.
+</p>
+<sample src="TypeConfusionGood.cpp"/>
+</example>
+
+<references>
+<li>
+Microsoft Learn: <a href="https://learn.microsoft.com/en-us/cpp/cpp/type-conversions-and-type-safety-modern-cpp">Type conversions and type safety</a>.
+</li>
+</references>
+</qhelp>
diff --git a/cpp/ql/src/Security/CWE/CWE-843/TypeConfusion.ql b/cpp/ql/src/Security/CWE/CWE-843/TypeConfusion.ql
@@ -0,0 +1,263 @@
+/**
+ * @name Type confusion
+ * @description Casting a value to an incompatible type can lead to undefined behavior.
+ * @kind path-problem
+ * @problem.severity warning
+ * @security-severity 9.3
+ * @precision medium
+ * @id cpp/type-confusion
+ * @tags security
+ *       external/cwe/cwe-843
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.new.DataFlow
+import Flow::PathGraph
+
+/**
+ * Holds if `f` is a field located at byte offset `offset` in `c`.
+ *
+ * Note that predicate is recursive, so that given the following:
+ * ```cpp
+ * struct S1 {
+ *   int a;
+ *   void* b;
+ * };
+ *
+ * struct S2 {
+ *   S1 s1;
+ *   char c;
+ * };
+ * ```
+ * both `hasAFieldWithOffset(S2, s1, 0)` and `hasAFieldWithOffset(S2, a, 0)`
+ * holds.
+ */
+predicate hasAFieldWithOffset(Class c, Field f, int offset) {
+  // Base case: `f` is a field in `c`.
+  f = c.getAField() and
+  offset = f.getByteOffset() and
+  not f.getUnspecifiedType().(Class).hasDefinition()
+  or
+  // Otherwise, we find the struct that is a field of `c` which then has
+  // the field `f` as a member.
+  exists(Field g |
+    g = c.getAField() and
+    // Find the field with the largest offset that's less than or equal to
+    // offset. That's the struct we need to search recursively.
+    g =
+      max(Field cand, int candOffset |
+        cand = c.getAField() and
+        candOffset = cand.getByteOffset() and
+        offset >= candOffset
+      |
+        cand order by candOffset
+      ) and
+    hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
+  )
+}
+
+/** Holds if `f` is the last field of its declaring class. */
+predicate lastField(Field f) {
+  exists(Class c | c = f.getDeclaringType() |
+    f =
+      max(Field cand, int byteOffset |
+        cand.getDeclaringType() = c and byteOffset = f.getByteOffset()
+      |
+        cand order by byteOffset
+      )
+  )
+}
+
+/**
+ * Holds if there exists a field in `c2` at offset `offset` that's compatible
+ * with `f1`.
+ */
+bindingset[f1, offset, c2]
+pragma[inline_late]
+predicate hasCompatibleFieldAtOffset(Field f1, int offset, Class c2) {
+  exists(Field f2 | hasAFieldWithOffset(c2, f2, offset) |
+    // Let's not deal with bit-fields for now.
+    f2 instanceof BitField
+    or
+    f1.getUnspecifiedType().getSize() = f2.getUnspecifiedType().getSize()
+    or
+    lastField(f1) and
+    f1.getUnspecifiedType().getSize() <= f2.getUnspecifiedType().getSize()
+  )
+}
+
+/**
+ * Holds if `c1` is a prefix of `c2`.
+ */
+bindingset[c1, c2]
+pragma[inline_late]
+predicate prefix(Class c1, Class c2) {
+  not c1.isPolymorphic() and
+  not c2.isPolymorphic() and
+  if c1 instanceof Union
+  then
+    // If it's a union we just verify that one of it's variants is compatible with the other class
+    exists(Field f1, int offset |
+      // Let's not deal with bit-fields for now.
+      not f1 instanceof BitField and
+      hasAFieldWithOffset(c1, f1, offset)
+    |
+      hasCompatibleFieldAtOffset(f1, offset, c2)
+    )
+  else
+    forall(Field f1, int offset |
+      // Let's not deal with bit-fields for now.
+      not f1 instanceof BitField and
+      hasAFieldWithOffset(c1, f1, offset)
+    |
+      hasCompatibleFieldAtOffset(f1, offset, c2)
+    )
+}
+
+/**
+ * An unsafe cast is any explicit cast that is not
+ * a `dynamic_cast`.
+ */
+class UnsafeCast extends Cast {
+  private Class toType;
+
+  UnsafeCast() {
+    (
+      this instanceof CStyleCast
+      or
+      this instanceof StaticCast
+      or
+      this instanceof ReinterpretCast
+    ) and
+    toType = this.getExplicitlyConverted().getUnspecifiedType().stripType() and
+    not this.isImplicit() and
+    exists(TypeDeclarationEntry tde |
+      tde = toType.getDefinition() and
+      not tde.isFromUninstantiatedTemplate(_)
+    )
+  }
+
+  Class getConvertedType() { result = toType }
+
+  /**
+   * Holds if the result of this cast can safely be interpreted as a value of
+   * type `t`.
+   *
+   * The compatibility rules are as follows:
+   *
+   * 1. the result of `(T)x` is compatible with the type `T` for any `T`
+   * 2. the result of `(T)x` is compatible with the type `U` for any `U` such
+   *    that `U` is a subtype of `T`, or `T` is a subtype of `U`.
+   * 3. the result of `(T)x` is compatible with the type `U` if the list
+   *    of fields of `T` is a prefix of the list of fields of `U`.
+   *    For example, if `U` is `struct { unsigned char x; int y; };`
+   *    and `T` is `struct { unsigned char uc; };`.
+   * 4. the result of `(T)x` is compatible with the type `U` if the list
+   *    of fields of `U` is a prefix of the list of fields of `T`.
+   *
+   *    Condition 4 is a bit controversial, since it assumes that the additional
+   *    fields in `T` won't be accessed. This may result in some FNs.
+   */
+  bindingset[this, t]
+  pragma[inline_late]
+  predicate compatibleWith(Type t) {
+    // Conition 1
+    t.stripType() = this.getConvertedType()
+    or
+    // Condition 3
+    prefix(this.getConvertedType(), t.stripType())
+    or
+    // Condition 4
+    prefix(t.stripType(), this.getConvertedType())
+    or
+    // Condition 2 (a)
+    t.stripType().(Class).getABaseClass+() = this.getConvertedType()
+    or
+    // Condition 2 (b)
+    t.stripType() = this.getConvertedType().getABaseClass+()
+  }
+}
+
+/**
+ * Holds if `source` is an allocation that allocates a value of type `type`.
+ */
+predicate isSourceImpl(DataFlow::Node source, Class type) {
+  exists(AllocationExpr alloc |
+    alloc = source.asExpr() and
+    type = alloc.getAllocatedElementType().stripType() and
+    not exists(
+      alloc
+          .(NewOrNewArrayExpr)
+          .getAllocator()
+          .(OperatorNewAllocationFunction)
+          .getPlacementArgument()
+    )
+  ) and
+  exists(TypeDeclarationEntry tde |
+    tde = type.getDefinition() and
+    not tde.isFromUninstantiatedTemplate(_)
+  )
+}
+
+/** A configuration describing flow from an allocation to a potentially unsafe cast. */
+module Config implements DataFlow::ConfigSig {
+  predicate isSource(DataFlow::Node source) { isSourceImpl(source, _) }
+
+  predicate isBarrier(DataFlow::Node node) {
+    // We disable flow through global variables to reduce FPs from infeasible paths
+    node instanceof DataFlow::VariableNode
+    or
+    exists(Class c | c = node.getType().stripType() |
+      not c.hasDefinition()
+      or
+      exists(TypeDeclarationEntry tde |
+        tde = c.getDefinition() and
+        tde.isFromUninstantiatedTemplate(_)
+      )
+    )
+  }
+
+  predicate isSink(DataFlow::Node sink) { sink.asExpr() = any(UnsafeCast cast).getUnconverted() }
+
+  int fieldFlowBranchLimit() { result = 0 }
+}
+
+module Flow = DataFlow::Global<Config>;
+
+predicate relevantType(DataFlow::Node sink, Class allocatedType) {
+  exists(DataFlow::Node source |
+    Flow::flow(source, sink) and
+    isSourceImpl(source, allocatedType)
+  )
+}
+
+predicate isSinkImpl(
+  DataFlow::Node sink, Class allocatedType, Type convertedType, boolean compatible
+) {
+  exists(UnsafeCast cast |
+    relevantType(sink, allocatedType) and
+    sink.asExpr() = cast.getUnconverted() and
+    convertedType = cast.getConvertedType()
+  |
+    if cast.compatibleWith(allocatedType) then compatible = true else compatible = false
+  )
+}
+
+from
+  Flow::PathNode source, Flow::PathNode sink, Type badSourceType, Type sinkType,
+  DataFlow::Node sinkNode
+where
+  Flow::flowPath(source, sink) and
+  sinkNode = sink.getNode() and
+  isSourceImpl(source.getNode(), badSourceType) and
+  isSinkImpl(sinkNode, badSourceType, sinkType, false) and
+  // If there is any flow that would result in a valid cast then we don't
+  // report an alert here. This reduces the number of FPs from infeasible paths
+  // significantly.
+  not exists(DataFlow::Node goodSource, Type goodSourceType |
+    isSourceImpl(goodSource, goodSourceType) and
+    isSinkImpl(sinkNode, goodSourceType, sinkType, true) and
+    Flow::flow(goodSource, sinkNode)
+  )
+select sinkNode, source, sink, "Conversion from $@ to $@ is invalid.", badSourceType,
+  badSourceType.toString(), sinkType, sinkType.toString()
diff --git a/cpp/ql/src/Security/CWE/CWE-843/TypeConfusionBad.cpp b/cpp/ql/src/Security/CWE/CWE-843/TypeConfusionBad.cpp
@@ -0,0 +1,7 @@
+void allocate_and_draw_bad() {
+  Shape* shape = new Circle;
+  // ...
+  // BAD: Assumes that shape is always a Square
+  Square* square = static_cast<Square*>(shape);
+  int length = square->getLength();
+}
diff --git a/cpp/ql/src/Security/CWE/CWE-843/TypeConfusionCommon.cpp b/cpp/ql/src/Security/CWE/CWE-843/TypeConfusionCommon.cpp
@@ -0,0 +1,25 @@
+struct Shape {
+  virtual ~Shape();
+
+  virtual void draw() = 0;
+};
+
+struct Circle : public Shape {
+  Circle();
+
+  void draw() override {
+    /* ... */
+  }
+
+  int getRadius();
+};
+
+struct Square : public Shape {
+  Square();
+
+  void draw() override {
+    /* ... */
+  }
+
+  int getLength();
+};
diff --git a/cpp/ql/src/Security/CWE/CWE-843/TypeConfusionGood.cpp b/cpp/ql/src/Security/CWE/CWE-843/TypeConfusionGood.cpp
@@ -0,0 +1,11 @@
+void allocate_and_draw_good() {
+  Shape* shape = new Circle;
+  // ...
+  // GOOD: Dynamically checks if shape is a Square
+  Square* square = dynamic_cast<Square*>(shape);
+  if(square) {
+    int length = square->getLength();
+  } else {
+    // handle error
+  }
+}
diff --git a/cpp/ql/src/change-notes/2024-03-05-type-confusion-query.md b/cpp/ql/src/change-notes/2024-03-05-type-confusion-query.md
@@ -0,0 +1,4 @@
+---
+category: newQuery
+---
+* Added a new query, `cpp/type-confusion`, to detect casts to invalid types.
diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-843/TypeConfusion.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-843/TypeConfusion.expected
diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-843/TypeConfusion.qlref b/cpp/ql/test/query-tests/Security/CWE/CWE-843/TypeConfusion.qlref
diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-843/test.cpp b/cpp/ql/test/query-tests/Security/CWE/CWE-843/test.cpp

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +---
 +category: newQuery
 +---
 +* Added a new query, `cpp/type-confusion`, to detect casts to invalid types.