|
1 | 1 | /**
|
2 | 2 | * @name Non-constant format string
|
3 |
| - * @description Passing a non-constant 'format' string to a printf-like function can lead |
| 3 | + * @description Passing a value that is not a string literal 'format' string to a printf-like function can lead |
4 | 4 | * to a mismatch between the number of arguments defined by the 'format' and the number
|
5 | 5 | * of arguments actually passed to the function. If the format string ultimately stems
|
6 | 6 | * from an untrusted source, this can be used for exploits.
|
| 7 | + * This query finds format strings coming from non-literal sources. Note that format strings of |
| 8 | + * type `const char*` it is still considered non-constant if the value is not coming from a string |
| 9 | + * literal. For example, for a parameter with type `const char*` of an exported function that is |
| 10 | + * used as a format string, there is no way to ensure the originating value was a string literal. |
7 | 11 | * @kind problem
|
8 | 12 | * @problem.severity recommendation
|
9 | 13 | * @security-severity 9.3
|
|
16 | 20 | */
|
17 | 21 |
|
18 | 22 | import semmle.code.cpp.ir.dataflow.TaintTracking
|
19 |
| -import semmle.code.cpp.models.implementations.GetText |
20 | 23 | import semmle.code.cpp.commons.Printf
|
| 24 | +import semmle.code.cpp.security.FlowSources |
| 25 | +import semmle.code.cpp.ir.dataflow.internal.ModelUtil |
| 26 | +import semmle.code.cpp.models.interfaces.DataFlow |
| 27 | +import semmle.code.cpp.models.interfaces.Taint |
| 28 | +import semmle.code.cpp.ir.IR |
21 | 29 |
|
22 |
| -// For the following `...gettext` functions, we assume that |
23 |
| -// all translations preserve the type and order of `%` specifiers |
24 |
| -// (and hence are safe to use as format strings). This |
25 |
| -// assumption is hard-coded into the query. |
26 |
| -predicate whitelistFunction(Function f, int arg) { |
27 |
| - // basic variations of gettext |
28 |
| - f.getName() = "_" and arg = 0 |
29 |
| - or |
30 |
| - exists(FunctionInput input | |
31 |
| - f.(GetTextFunction).hasDataFlow(input, _) and |
32 |
| - input.isParameterDeref(arg) |
33 |
| - ) |
34 |
| -} |
35 |
| - |
36 |
| -// we assume that ALL uses of the `_` macro (and calls to `gettext`) |
37 |
| -// return constant string literals |
38 |
| -predicate underscoreMacro(Expr e) { |
39 |
| - exists(MacroInvocation mi | |
40 |
| - mi.getMacroName() = "_" and |
41 |
| - mi.getExpr() = e |
42 |
| - ) |
43 |
| - or |
44 |
| - e = any(GetTextFunction gettext).getACallToThisFunction() |
| 30 | +class UncalledFunction extends Function { |
| 31 | + UncalledFunction() { |
| 32 | + not exists(Call c | c.getTarget() = this) and |
| 33 | + // Ignore functions that appear to be function pointers |
| 34 | + // function pointers may be seen as uncalled statically |
| 35 | + not exists(FunctionAccess fa | fa.getTarget() = this) |
| 36 | + } |
45 | 37 | }
|
46 | 38 |
|
47 | 39 | /**
|
48 |
| - * Holds if `t` cannot hold a character array, directly or indirectly. |
| 40 | + * Holds if `node` is a non-constant source of data flow for non-const format string detection. |
| 41 | + * This is defined as either: |
| 42 | + * 1) a `FlowSource` |
| 43 | + * 2) a parameter of an 'uncalled' function |
| 44 | + * 3) an argument to a function with no definition that is not known to define the output through its input |
| 45 | + * 4) an out arg of a function with no definition that is not known to define the output through its input |
| 46 | + * |
| 47 | + * The latter two cases address identifying standard string manipulation libraries as input sources |
| 48 | + * e.g., strcpy. More simply, functions without definitions that are known to manipulate the |
| 49 | + * input to produce an output are not sources. Instead the ultimate source of input to these functions |
| 50 | + * should be considered as the source. |
| 51 | + * |
| 52 | + * False Negative Implication: This approach has false negatives (fails to identify non-const sources) |
| 53 | + * when the source is a field of a struct or object and the initialization is not observed statically. |
| 54 | + * There are 3 general cases where this can occur: |
| 55 | + * 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string. |
| 56 | + * 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically. |
| 57 | + * e.g., an object constructor isn't known statically, or a function sets fields |
| 58 | + * of a struct, but the function is not known statically. |
| 59 | + * 3) A function meeting cases (3) and (4) above returns (through an out argument or return value) |
| 60 | + * a struct or object where a field containing a format string has been initialized. |
| 61 | + * |
| 62 | + * Note, uninitialized variables used as format strings are never detected by design. |
| 63 | + * Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query. |
49 | 64 | */
|
50 |
| -predicate cannotContainString(Type t, boolean isIndirect) { |
51 |
| - isIndirect = false and |
52 |
| - exists(Type unspecified | |
53 |
| - unspecified = t.getUnspecifiedType() and |
54 |
| - not unspecified instanceof UnknownType |
55 |
| - | |
56 |
| - unspecified instanceof BuiltInType or |
57 |
| - unspecified instanceof IntegralOrEnumType |
| 65 | +predicate isNonConst(DataFlow::Node node) { |
| 66 | + node instanceof FlowSource |
| 67 | + or |
| 68 | + // Parameters of uncalled functions that aren't const |
| 69 | + exists(UncalledFunction f, Parameter p | |
| 70 | + f.getAParameter() = p and |
| 71 | + p = node.asParameter() |
58 | 72 | )
|
59 |
| -} |
60 |
| - |
61 |
| -predicate isNonConst(DataFlow::Node node, boolean isIndirect) { |
62 |
| - exists(Expr e | |
63 |
| - e = node.asExpr() and isIndirect = false |
64 |
| - or |
65 |
| - e = node.asIndirectExpr() and isIndirect = true |
66 |
| - | |
67 |
| - exists(FunctionCall fc | fc = e | |
68 |
| - not ( |
69 |
| - whitelistFunction(fc.getTarget(), _) or |
70 |
| - fc.getTarget().hasDefinition() |
71 |
| - ) |
72 |
| - ) |
73 |
| - or |
74 |
| - exists(Parameter p | p = e.(VariableAccess).getTarget() | |
75 |
| - p.getFunction().getName() = "main" and p.getType() instanceof PointerType |
| 73 | + or |
| 74 | + // Consider as an input any out arg of a function or a function's return where the function is not: |
| 75 | + // 1. a function with a known dataflow or taintflow from input to output and the `node` is the output |
| 76 | + // 2. a function where there is a known definition |
| 77 | + // i.e., functions that with unknown bodies and are not known to define the output through its input |
| 78 | + // are considered as possible non-const sources |
| 79 | + // The function's output must also not be const to be considered a non-const source |
| 80 | + exists(Function func, CallInstruction call | |
| 81 | + // NOTE: could use `Call` getAnArgument() instead of `CallInstruction` but requires two |
| 82 | + // variables representing the same call in ordoer to use `callOutput` below. |
| 83 | + exists(Expr arg | |
| 84 | + call.getPositionalArgumentOperand(_).getDef().getUnconvertedResultExpression() = arg and |
| 85 | + arg = node.asDefiningArgument() |
76 | 86 | )
|
77 | 87 | or
|
78 |
| - e instanceof CrementOperation |
79 |
| - or |
80 |
| - e instanceof AddressOfExpr |
81 |
| - or |
82 |
| - e instanceof ReferenceToExpr |
83 |
| - or |
84 |
| - e instanceof AssignPointerAddExpr |
85 |
| - or |
86 |
| - e instanceof AssignPointerSubExpr |
87 |
| - or |
88 |
| - e instanceof PointerArithmeticOperation |
89 |
| - or |
90 |
| - e instanceof FieldAccess |
91 |
| - or |
92 |
| - e instanceof PointerDereferenceExpr |
93 |
| - or |
94 |
| - e instanceof AddressOfExpr |
95 |
| - or |
96 |
| - e instanceof ExprCall |
97 |
| - or |
98 |
| - e instanceof NewArrayExpr |
99 |
| - or |
100 |
| - exists(Variable v | v = e.(VariableAccess).getTarget() | |
101 |
| - v.getType().(ArrayType).getBaseType() instanceof CharType and |
102 |
| - exists(AssignExpr ae | |
103 |
| - ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v |
104 |
| - ) |
| 88 | + call.getUnconvertedResultExpression() = node.asIndirectExpr() |
| 89 | + | |
| 90 | + func = call.getStaticCallTarget() and |
| 91 | + not exists(FunctionOutput output | |
| 92 | + // NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf |
| 93 | + // variant function's output are now possible non-const sources |
| 94 | + pragma[only_bind_out](func).(DataFlowFunction).hasDataFlow(_, output) or |
| 95 | + pragma[only_bind_out](func).(TaintFunction).hasTaintFlow(_, output) |
| 96 | + | |
| 97 | + node = callOutput(call, output) |
105 | 98 | )
|
| 99 | + ) and |
| 100 | + not exists(Call c | |
| 101 | + c.getTarget().hasDefinition() and |
| 102 | + if node instanceof DataFlow::DefinitionByReferenceNode |
| 103 | + then c.getAnArgument() = node.asDefiningArgument() |
| 104 | + else c = [node.asExpr(), node.asIndirectExpr()] |
106 | 105 | )
|
107 |
| - or |
108 |
| - node instanceof DataFlow::DefinitionByReferenceNode and |
109 |
| - isIndirect = true |
110 |
| -} |
111 |
| - |
112 |
| -pragma[noinline] |
113 |
| -predicate isBarrierNode(DataFlow::Node node) { |
114 |
| - underscoreMacro([node.asExpr(), node.asIndirectExpr()]) |
115 |
| - or |
116 |
| - exists(node.asExpr()) and |
117 |
| - cannotContainString(node.getType(), false) |
118 | 106 | }
|
119 | 107 |
|
| 108 | +/** |
| 109 | + * Holds if `sink` is a sink is a format string of any |
| 110 | + * `FormattingFunctionCall`. |
| 111 | + */ |
120 | 112 | predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
|
121 | 113 | [sink.asExpr(), sink.asIndirectExpr()] = formatString and
|
122 | 114 | exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
|
123 | 115 | }
|
124 | 116 |
|
125 | 117 | module NonConstFlowConfig implements DataFlow::ConfigSig {
|
126 |
| - predicate isSource(DataFlow::Node source) { |
127 |
| - exists(boolean isIndirect, Type t | |
128 |
| - isNonConst(source, isIndirect) and |
129 |
| - t = source.getType() and |
130 |
| - not cannotContainString(t, isIndirect) |
131 |
| - ) |
132 |
| - } |
| 118 | + predicate isSource(DataFlow::Node source) { isNonConst(source) } |
133 | 119 |
|
134 | 120 | predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
|
135 | 121 |
|
136 |
| - predicate isBarrier(DataFlow::Node node) { isBarrierNode(node) } |
| 122 | + predicate isBarrier(DataFlow::Node node) { |
| 123 | + // Ignore tracing non-const through array indices |
| 124 | + exists(ArrayExpr a | a.getArrayOffset() = node.asExpr()) |
| 125 | + } |
137 | 126 | }
|
138 | 127 |
|
139 | 128 | module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>;
|
140 | 129 |
|
141 |
| -from FormattingFunctionCall call, Expr formatString |
| 130 | +from FormattingFunctionCall call, Expr formatString, DataFlow::Node sink |
142 | 131 | where
|
143 | 132 | call.getArgument(call.getFormatParameterIndex()) = formatString and
|
144 |
| - exists(DataFlow::Node sink | |
145 |
| - NonConstFlow::flowTo(sink) and |
146 |
| - isSinkImpl(sink, formatString) |
147 |
| - ) |
| 133 | + NonConstFlow::flowTo(sink) and |
| 134 | + isSinkImpl(sink, formatString) |
148 | 135 | select formatString,
|
149 | 136 | "The format string argument to " + call.getTarget().getName() +
|
150 | 137 | " should be constant to prevent security issues and other potential errors."
|
0 commit comments