Merge pull request #6125 from MathiasVP/improve-tainted-arithmetic

MathiasVP · web-flow · commit 9b8f558fb8bf · 2021-06-23T16:44:20.000+02:00
C++: Add more barriers to `cpp/tainted-arithmetic`
diff --git a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql
@@ -2,7 +2,7 @@
  * @name User-controlled data in arithmetic expression
  * @description Arithmetic operations on user-controlled data that is
  *              not validated can cause overflows.
- * @kind problem
+ * @kind path-problem
  * @problem.severity warning
  * @security-severity 8.6
  * @precision low
@@ -16,22 +16,39 @@ import cpp
 import semmle.code.cpp.security.Overflow
 import semmle.code.cpp.security.Security
 import semmle.code.cpp.security.TaintTracking
+import TaintedWithPath
+import Bounded
 
-from Expr origin, Operation op, Expr e, string effect
+bindingset[op]
+predicate missingGuard(Operation op, Expr e, string effect) {
+  missingGuardAgainstUnderflow(op, e) and effect = "underflow"
+  or
+  missingGuardAgainstOverflow(op, e) and effect = "overflow"
+  or
+  not e instanceof VariableAccess and effect = "overflow"
+}
+
+class Configuration extends TaintTrackingConfiguration {
+  override predicate isSink(Element e) {
+    exists(Operation op |
+      missingGuard(op, e, _) and
+      op.getAnOperand() = e
+    |
+      op instanceof UnaryArithmeticOperation or
+      op instanceof BinaryArithmeticOperation
+    )
+  }
+
+  override predicate isBarrier(Expr e) {
+    super.isBarrier(e) or bounded(e) or e.getUnspecifiedType().(IntegralType).getSize() <= 1
+  }
+}
+
+from Expr origin, Expr e, string effect, PathNode sourceNode, PathNode sinkNode, Operation op
 where
-  isUserInput(origin, _) and
-  tainted(origin, e) and
+  taintedWithPath(origin, e, sourceNode, sinkNode) and
   op.getAnOperand() = e and
-  (
-    missingGuardAgainstUnderflow(op, e) and effect = "underflow"
-    or
-    missingGuardAgainstOverflow(op, e) and effect = "overflow"
-    or
-    not e instanceof VariableAccess and effect = "overflow"
-  ) and
-  (
-    op instanceof UnaryArithmeticOperation or
-    op instanceof BinaryArithmeticOperation
-  )
-select e, "$@ flows to here and is used in arithmetic, potentially causing an " + effect + ".",
-  origin, "User-provided value"
+  missingGuard(op, e, effect)
+select e, sourceNode, sinkNode,
+  "$@ flows to here and is used in arithmetic, potentially causing an " + effect + ".", origin,
+  "User-provided value"
diff --git a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticUncontrolled.ql b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticUncontrolled.ql
@@ -16,8 +16,8 @@ import cpp
 import semmle.code.cpp.security.Overflow
 import semmle.code.cpp.security.Security
 import semmle.code.cpp.security.TaintTracking
-import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
 import TaintedWithPath
+import Bounded
 
 predicate isUnboundedRandCall(FunctionCall fc) {
   exists(Function func | func = fc.getTarget() |
@@ -27,81 +27,6 @@ predicate isUnboundedRandCall(FunctionCall fc) {
   )
 }
 
-/**
- * An operand `e` of a division expression (i.e., `e` is an operand of either a `DivExpr` or
- * a `AssignDivExpr`) is bounded when `e` is the left-hand side of the division.
- */
-pragma[inline]
-predicate boundedDiv(Expr e, Expr left) { e = left }
-
-/**
- * An operand `e` of a remainder expression `rem` (i.e., `rem` is either a `RemExpr` or
- * an `AssignRemExpr`) with left-hand side `left` and right-hand side `right` is bounded
- * when `e` is `left` and `right` is upper bounded by some number that is less than the maximum integer
- * allowed by the result type of `rem`.
- */
-pragma[inline]
-predicate boundedRem(Expr e, Expr rem, Expr left, Expr right) {
-  e = left and
-  upperBound(right.getFullyConverted()) < exprMaxVal(rem.getFullyConverted())
-}
-
-/**
- * An operand `e` of a bitwise and expression `andExpr` (i.e., `andExpr` is either an `BitwiseAndExpr`
- * or an `AssignAndExpr`) with operands `operand1` and `operand2` is the operand that is not `e` is upper
- * bounded by some number that is less than the maximum integer allowed by the result type of `andExpr`.
- */
-pragma[inline]
-predicate boundedBitwiseAnd(Expr e, Expr andExpr, Expr operand1, Expr operand2) {
-  operand1 != operand2 and
-  e = operand1 and
-  upperBound(operand2.getFullyConverted()) < exprMaxVal(andExpr.getFullyConverted())
-}
-
-/**
- * Holds if `e` is an arithmetic expression that cannot overflow, or if `e` is an operand of an
- * operation that may greatly reduces the range of possible values.
- */
-predicate bounded(Expr e) {
-  (
-    e instanceof UnaryArithmeticOperation or
-    e instanceof BinaryArithmeticOperation or
-    e instanceof AssignArithmeticOperation
-  ) and
-  not convertedExprMightOverflow(e)
-  or
-  //  For `%` and `&` we require that `e` is bounded by a value that is strictly smaller than the
-  //  maximum possible value of the result type of the operation.
-  //  For example, the function call `rand()` is considered bounded in the following program:
-  //  ```
-  //  int i = rand() % (UINT8_MAX + 1);
-  //  ```
-  //  but not in:
-  //  ```
-  //  unsigned char uc = rand() % (UINT8_MAX + 1);
-  //  ```
-  exists(RemExpr rem | boundedRem(e, rem, rem.getLeftOperand(), rem.getRightOperand()))
-  or
-  exists(AssignRemExpr rem | boundedRem(e, rem, rem.getLValue(), rem.getRValue()))
-  or
-  exists(BitwiseAndExpr andExpr |
-    boundedBitwiseAnd(e, andExpr, andExpr.getAnOperand(), andExpr.getAnOperand())
-  )
-  or
-  exists(AssignAndExpr andExpr |
-    boundedBitwiseAnd(e, andExpr, andExpr.getAnOperand(), andExpr.getAnOperand())
-  )
-  or
-  // Optimitically assume that a division or right shift always yields a much smaller value.
-  boundedDiv(e, any(DivExpr div).getLeftOperand())
-  or
-  boundedDiv(e, any(AssignDivExpr div).getLValue())
-  or
-  boundedDiv(e, any(RShiftExpr shift).getLeftOperand())
-  or
-  boundedDiv(e, any(AssignRShiftExpr div).getLValue())
-}
-
 predicate isUnboundedRandCallOrParent(Expr e) {
   isUnboundedRandCall(e)
   or
diff --git a/cpp/ql/src/Security/CWE/CWE-190/Bounded.qll b/cpp/ql/src/Security/CWE/CWE-190/Bounded.qll
@@ -0,0 +1,83 @@
+/**
+ * This file provides the `bounded` predicate that is used in both `cpp/uncontrolled-arithmetic`
+ * and `cpp/tainted-arithmetic`.
+ */
+
+private import cpp
+private import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
+
+/**
+ * An operand `e` of a division expression (i.e., `e` is an operand of either a `DivExpr` or
+ * a `AssignDivExpr`) is bounded when `e` is the left-hand side of the division.
+ */
+pragma[inline]
+private predicate boundedDiv(Expr e, Expr left) { e = left }
+
+/**
+ * An operand `e` of a remainder expression `rem` (i.e., `rem` is either a `RemExpr` or
+ * an `AssignRemExpr`) with left-hand side `left` and right-ahnd side `right` is bounded
+ * when `e` is `left` and `right` is upper bounded by some number that is less than the maximum integer
+ * allowed by the result type of `rem`.
+ */
+pragma[inline]
+private predicate boundedRem(Expr e, Expr rem, Expr left, Expr right) {
+  e = left and
+  upperBound(right.getFullyConverted()) < exprMaxVal(rem.getFullyConverted())
+}
+
+/**
+ * An operand `e` of a bitwise and expression `andExpr` (i.e., `andExpr` is either an `BitwiseAndExpr`
+ * or an `AssignAndExpr`) with operands `operand1` and `operand2` is the operand that is not `e` is upper
+ * bounded by some number that is less than the maximum integer allowed by the result type of `andExpr`.
+ */
+pragma[inline]
+private predicate boundedBitwiseAnd(Expr e, Expr andExpr, Expr operand1, Expr operand2) {
+  operand1 != operand2 and
+  e = operand1 and
+  upperBound(operand2.getFullyConverted()) < exprMaxVal(andExpr.getFullyConverted())
+}
+
+/**
+ * Holds if `e` is an arithmetic expression that cannot overflow, or if `e` is an operand of an
+ * operation that may greatly reduce the range of possible values.
+ */
+predicate bounded(Expr e) {
+  (
+    e instanceof UnaryArithmeticOperation or
+    e instanceof BinaryArithmeticOperation or
+    e instanceof AssignArithmeticOperation
+  ) and
+  not convertedExprMightOverflow(e)
+  or
+  //  For `%` and `&` we require that `e` is bounded by a value that is strictly smaller than the
+  //  maximum possible value of the result type of the operation.
+  //  For example, the function call `rand()` is considered bounded in the following program:
+  //  ```
+  //  int i = rand() % (UINT8_MAX + 1);
+  //  ```
+  //  but not in:
+  //  ```
+  //  unsigned char uc = rand() % (UINT8_MAX + 1);
+  //  ```
+  exists(RemExpr rem | boundedRem(e, rem, rem.getLeftOperand(), rem.getRightOperand()))
+  or
+  exists(AssignRemExpr rem | boundedRem(e, rem, rem.getLValue(), rem.getRValue()))
+  or
+  exists(BitwiseAndExpr andExpr |
+    boundedBitwiseAnd(e, andExpr, andExpr.getAnOperand(), andExpr.getAnOperand())
+  )
+  or
+  exists(AssignAndExpr andExpr |
+    boundedBitwiseAnd(e, andExpr, andExpr.getAnOperand(), andExpr.getAnOperand())
+  )
+  or
+  // Optimitically assume that a division always yields a much smaller value.
+  boundedDiv(e, any(DivExpr div).getLeftOperand())
+  or
+  boundedDiv(e, any(AssignDivExpr div).getLValue())
+  or
+  boundedDiv(e, any(RShiftExpr shift).getLeftOperand())
+  or
+  boundedDiv(e, any(AssignRShiftExpr div).getLValue())
+}
diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/ArithmeticTainted.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/ArithmeticTainted.expected
@@ -1,9 +1,68 @@
-| test2.cpp:14:11:14:11 | v | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test2.cpp:25:22:25:23 | & ... | User-provided value |
-| test2.cpp:14:11:14:11 | v | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test2.cpp:25:22:25:23 | & ... | User-provided value |
-| test5.cpp:17:6:17:18 | call to getTaintedInt | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test5.cpp:9:7:9:9 | buf | User-provided value |
-| test5.cpp:19:6:19:6 | y | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test5.cpp:9:7:9:9 | buf | User-provided value |
-| test5.cpp:19:6:19:6 | y | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test5.cpp:9:7:9:9 | buf | User-provided value |
-| test.c:14:15:14:28 | maxConnections | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test.c:11:29:11:32 | argv | User-provided value |
-| test.c:14:15:14:28 | maxConnections | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test.c:11:29:11:32 | argv | User-provided value |
-| test.c:44:7:44:10 | len2 | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test.c:41:17:41:20 | argv | User-provided value |
-| test.c:54:7:54:10 | len3 | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test.c:51:17:51:20 | argv | User-provided value |
+edges
+| test2.cpp:12:21:12:21 | v | test2.cpp:14:11:14:11 | v |
+| test2.cpp:12:21:12:21 | v | test2.cpp:14:11:14:11 | v |
+| test2.cpp:25:22:25:23 | & ... | test2.cpp:27:2:27:11 | v |
+| test2.cpp:25:22:25:23 | fscanf output argument | test2.cpp:27:2:27:11 | v |
+| test2.cpp:27:2:27:11 | v | test2.cpp:12:21:12:21 | v |
+| test5.cpp:9:7:9:9 | buf | test5.cpp:10:9:10:27 | Store |
+| test5.cpp:9:7:9:9 | gets output argument | test5.cpp:10:9:10:27 | Store |
+| test5.cpp:10:9:10:27 | Store | test5.cpp:17:6:17:18 | call to getTaintedInt |
+| test5.cpp:10:9:10:27 | Store | test5.cpp:17:6:17:18 | call to getTaintedInt |
+| test5.cpp:10:9:10:27 | Store | test5.cpp:18:6:18:18 | call to getTaintedInt |
+| test5.cpp:18:6:18:18 | call to getTaintedInt | test5.cpp:19:6:19:6 | y |
+| test5.cpp:18:6:18:18 | call to getTaintedInt | test5.cpp:19:6:19:6 | y |
+| test.c:11:29:11:32 | argv | test.c:14:15:14:28 | maxConnections |
+| test.c:11:29:11:32 | argv | test.c:14:15:14:28 | maxConnections |
+| test.c:11:29:11:32 | argv | test.c:14:15:14:28 | maxConnections |
+| test.c:11:29:11:32 | argv | test.c:14:15:14:28 | maxConnections |
+| test.c:41:17:41:20 | argv | test.c:44:7:44:10 | len2 |
+| test.c:41:17:41:20 | argv | test.c:44:7:44:10 | len2 |
+| test.c:41:17:41:20 | argv | test.c:44:7:44:10 | len2 |
+| test.c:41:17:41:20 | argv | test.c:44:7:44:10 | len2 |
+| test.c:51:17:51:20 | argv | test.c:54:7:54:10 | len3 |
+| test.c:51:17:51:20 | argv | test.c:54:7:54:10 | len3 |
+| test.c:51:17:51:20 | argv | test.c:54:7:54:10 | len3 |
+| test.c:51:17:51:20 | argv | test.c:54:7:54:10 | len3 |
+nodes
+| test2.cpp:12:21:12:21 | v | semmle.label | v |
+| test2.cpp:14:11:14:11 | v | semmle.label | v |
+| test2.cpp:14:11:14:11 | v | semmle.label | v |
+| test2.cpp:14:11:14:11 | v | semmle.label | v |
+| test2.cpp:25:22:25:23 | & ... | semmle.label | & ... |
+| test2.cpp:25:22:25:23 | fscanf output argument | semmle.label | fscanf output argument |
+| test2.cpp:27:2:27:11 | v | semmle.label | v |
+| test5.cpp:9:7:9:9 | buf | semmle.label | buf |
+| test5.cpp:9:7:9:9 | gets output argument | semmle.label | gets output argument |
+| test5.cpp:10:9:10:27 | Store | semmle.label | Store |
+| test5.cpp:17:6:17:18 | call to getTaintedInt | semmle.label | call to getTaintedInt |
+| test5.cpp:17:6:17:18 | call to getTaintedInt | semmle.label | call to getTaintedInt |
+| test5.cpp:17:6:17:18 | call to getTaintedInt | semmle.label | call to getTaintedInt |
+| test5.cpp:18:6:18:18 | call to getTaintedInt | semmle.label | call to getTaintedInt |
+| test5.cpp:19:6:19:6 | y | semmle.label | y |
+| test5.cpp:19:6:19:6 | y | semmle.label | y |
+| test5.cpp:19:6:19:6 | y | semmle.label | y |
+| test.c:11:29:11:32 | argv | semmle.label | argv |
+| test.c:11:29:11:32 | argv | semmle.label | argv |
+| test.c:14:15:14:28 | maxConnections | semmle.label | maxConnections |
+| test.c:14:15:14:28 | maxConnections | semmle.label | maxConnections |
+| test.c:14:15:14:28 | maxConnections | semmle.label | maxConnections |
+| test.c:41:17:41:20 | argv | semmle.label | argv |
+| test.c:41:17:41:20 | argv | semmle.label | argv |
+| test.c:44:7:44:10 | len2 | semmle.label | len2 |
+| test.c:44:7:44:10 | len2 | semmle.label | len2 |
+| test.c:44:7:44:10 | len2 | semmle.label | len2 |
+| test.c:51:17:51:20 | argv | semmle.label | argv |
+| test.c:51:17:51:20 | argv | semmle.label | argv |
+| test.c:54:7:54:10 | len3 | semmle.label | len3 |
+| test.c:54:7:54:10 | len3 | semmle.label | len3 |
+| test.c:54:7:54:10 | len3 | semmle.label | len3 |
+#select
+| test2.cpp:14:11:14:11 | v | test2.cpp:25:22:25:23 | & ... | test2.cpp:14:11:14:11 | v | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test2.cpp:25:22:25:23 | & ... | User-provided value |
+| test2.cpp:14:11:14:11 | v | test2.cpp:25:22:25:23 | & ... | test2.cpp:14:11:14:11 | v | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test2.cpp:25:22:25:23 | & ... | User-provided value |
+| test5.cpp:17:6:17:18 | call to getTaintedInt | test5.cpp:9:7:9:9 | buf | test5.cpp:17:6:17:18 | call to getTaintedInt | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test5.cpp:9:7:9:9 | buf | User-provided value |
+| test5.cpp:19:6:19:6 | y | test5.cpp:9:7:9:9 | buf | test5.cpp:19:6:19:6 | y | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test5.cpp:9:7:9:9 | buf | User-provided value |
+| test5.cpp:19:6:19:6 | y | test5.cpp:9:7:9:9 | buf | test5.cpp:19:6:19:6 | y | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test5.cpp:9:7:9:9 | buf | User-provided value |
+| test.c:14:15:14:28 | maxConnections | test.c:11:29:11:32 | argv | test.c:14:15:14:28 | maxConnections | $@ flows to here and is used in arithmetic, potentially causing an overflow. | test.c:11:29:11:32 | argv | User-provided value |
+| test.c:14:15:14:28 | maxConnections | test.c:11:29:11:32 | argv | test.c:14:15:14:28 | maxConnections | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test.c:11:29:11:32 | argv | User-provided value |
+| test.c:44:7:44:10 | len2 | test.c:41:17:41:20 | argv | test.c:44:7:44:10 | len2 | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test.c:41:17:41:20 | argv | User-provided value |
+| test.c:54:7:54:10 | len3 | test.c:51:17:51:20 | argv | test.c:54:7:54:10 | len3 | $@ flows to here and is used in arithmetic, potentially causing an underflow. | test.c:51:17:51:20 | argv | User-provided value |