Skip to content

Commit b927968

Browse files
authored
Merge pull request github#15516 from microsoft/51-2cppnon-constant-format-alter-not-const-source
C++: Change sources in `NonConstantFormat.ql`
2 parents c05431e + c38376a commit b927968

File tree

8 files changed

+240
-150
lines changed

8 files changed

+240
-150
lines changed
Lines changed: 89 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
/**
22
* @name Non-constant format string
3-
* @description Passing a non-constant 'format' string to a printf-like function can lead
3+
* @description Passing a value that is not a string literal 'format' string to a printf-like function can lead
44
* to a mismatch between the number of arguments defined by the 'format' and the number
55
* of arguments actually passed to the function. If the format string ultimately stems
66
* from an untrusted source, this can be used for exploits.
7+
* This query finds format strings coming from non-literal sources. Note that format strings of
8+
* type `const char*` it is still considered non-constant if the value is not coming from a string
9+
* literal. For example, for a parameter with type `const char*` of an exported function that is
10+
* used as a format string, there is no way to ensure the originating value was a string literal.
711
* @kind problem
812
* @problem.severity recommendation
913
* @security-severity 9.3
@@ -16,135 +20,118 @@
1620
*/
1721

1822
import semmle.code.cpp.ir.dataflow.TaintTracking
19-
import semmle.code.cpp.models.implementations.GetText
2023
import semmle.code.cpp.commons.Printf
24+
import semmle.code.cpp.security.FlowSources
25+
import semmle.code.cpp.ir.dataflow.internal.ModelUtil
26+
import semmle.code.cpp.models.interfaces.DataFlow
27+
import semmle.code.cpp.models.interfaces.Taint
28+
import semmle.code.cpp.ir.IR
2129

22-
// For the following `...gettext` functions, we assume that
23-
// all translations preserve the type and order of `%` specifiers
24-
// (and hence are safe to use as format strings). This
25-
// assumption is hard-coded into the query.
26-
predicate whitelistFunction(Function f, int arg) {
27-
// basic variations of gettext
28-
f.getName() = "_" and arg = 0
29-
or
30-
exists(FunctionInput input |
31-
f.(GetTextFunction).hasDataFlow(input, _) and
32-
input.isParameterDeref(arg)
33-
)
34-
}
35-
36-
// we assume that ALL uses of the `_` macro (and calls to `gettext`)
37-
// return constant string literals
38-
predicate underscoreMacro(Expr e) {
39-
exists(MacroInvocation mi |
40-
mi.getMacroName() = "_" and
41-
mi.getExpr() = e
42-
)
43-
or
44-
e = any(GetTextFunction gettext).getACallToThisFunction()
30+
class UncalledFunction extends Function {
31+
UncalledFunction() {
32+
not exists(Call c | c.getTarget() = this) and
33+
// Ignore functions that appear to be function pointers
34+
// function pointers may be seen as uncalled statically
35+
not exists(FunctionAccess fa | fa.getTarget() = this)
36+
}
4537
}
4638

4739
/**
48-
* Holds if `t` cannot hold a character array, directly or indirectly.
40+
* Holds if `node` is a non-constant source of data flow for non-const format string detection.
41+
* This is defined as either:
42+
* 1) a `FlowSource`
43+
* 2) a parameter of an 'uncalled' function
44+
* 3) an argument to a function with no definition that is not known to define the output through its input
45+
* 4) an out arg of a function with no definition that is not known to define the output through its input
46+
*
47+
* The latter two cases address identifying standard string manipulation libraries as input sources
48+
* e.g., strcpy. More simply, functions without definitions that are known to manipulate the
49+
* input to produce an output are not sources. Instead the ultimate source of input to these functions
50+
* should be considered as the source.
51+
*
52+
* False Negative Implication: This approach has false negatives (fails to identify non-const sources)
53+
* when the source is a field of a struct or object and the initialization is not observed statically.
54+
* There are 3 general cases where this can occur:
55+
* 1) Parameters of uncalled functions that are structs/objects and a field is accessed for a format string.
56+
* 2) A local variable that is a struct/object and initialization of the field occurs in code that is unseen statically.
57+
* e.g., an object constructor isn't known statically, or a function sets fields
58+
* of a struct, but the function is not known statically.
59+
* 3) A function meeting cases (3) and (4) above returns (through an out argument or return value)
60+
* a struct or object where a field containing a format string has been initialized.
61+
*
62+
* Note, uninitialized variables used as format strings are never detected by design.
63+
* Uninitialized variables are a separate vulnerability concern and should be addressed by a separate query.
4964
*/
50-
predicate cannotContainString(Type t, boolean isIndirect) {
51-
isIndirect = false and
52-
exists(Type unspecified |
53-
unspecified = t.getUnspecifiedType() and
54-
not unspecified instanceof UnknownType
55-
|
56-
unspecified instanceof BuiltInType or
57-
unspecified instanceof IntegralOrEnumType
65+
predicate isNonConst(DataFlow::Node node) {
66+
node instanceof FlowSource
67+
or
68+
// Parameters of uncalled functions that aren't const
69+
exists(UncalledFunction f, Parameter p |
70+
f.getAParameter() = p and
71+
p = node.asParameter()
5872
)
59-
}
60-
61-
predicate isNonConst(DataFlow::Node node, boolean isIndirect) {
62-
exists(Expr e |
63-
e = node.asExpr() and isIndirect = false
64-
or
65-
e = node.asIndirectExpr() and isIndirect = true
66-
|
67-
exists(FunctionCall fc | fc = e |
68-
not (
69-
whitelistFunction(fc.getTarget(), _) or
70-
fc.getTarget().hasDefinition()
71-
)
72-
)
73-
or
74-
exists(Parameter p | p = e.(VariableAccess).getTarget() |
75-
p.getFunction().getName() = "main" and p.getType() instanceof PointerType
73+
or
74+
// Consider as an input any out arg of a function or a function's return where the function is not:
75+
// 1. a function with a known dataflow or taintflow from input to output and the `node` is the output
76+
// 2. a function where there is a known definition
77+
// i.e., functions that with unknown bodies and are not known to define the output through its input
78+
// are considered as possible non-const sources
79+
// The function's output must also not be const to be considered a non-const source
80+
exists(Function func, CallInstruction call |
81+
// NOTE: could use `Call` getAnArgument() instead of `CallInstruction` but requires two
82+
// variables representing the same call in ordoer to use `callOutput` below.
83+
exists(Expr arg |
84+
call.getPositionalArgumentOperand(_).getDef().getUnconvertedResultExpression() = arg and
85+
arg = node.asDefiningArgument()
7686
)
7787
or
78-
e instanceof CrementOperation
79-
or
80-
e instanceof AddressOfExpr
81-
or
82-
e instanceof ReferenceToExpr
83-
or
84-
e instanceof AssignPointerAddExpr
85-
or
86-
e instanceof AssignPointerSubExpr
87-
or
88-
e instanceof PointerArithmeticOperation
89-
or
90-
e instanceof FieldAccess
91-
or
92-
e instanceof PointerDereferenceExpr
93-
or
94-
e instanceof AddressOfExpr
95-
or
96-
e instanceof ExprCall
97-
or
98-
e instanceof NewArrayExpr
99-
or
100-
exists(Variable v | v = e.(VariableAccess).getTarget() |
101-
v.getType().(ArrayType).getBaseType() instanceof CharType and
102-
exists(AssignExpr ae |
103-
ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
104-
)
88+
call.getUnconvertedResultExpression() = node.asIndirectExpr()
89+
|
90+
func = call.getStaticCallTarget() and
91+
not exists(FunctionOutput output |
92+
// NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf
93+
// variant function's output are now possible non-const sources
94+
pragma[only_bind_out](func).(DataFlowFunction).hasDataFlow(_, output) or
95+
pragma[only_bind_out](func).(TaintFunction).hasTaintFlow(_, output)
96+
|
97+
node = callOutput(call, output)
10598
)
99+
) and
100+
not exists(Call c |
101+
c.getTarget().hasDefinition() and
102+
if node instanceof DataFlow::DefinitionByReferenceNode
103+
then c.getAnArgument() = node.asDefiningArgument()
104+
else c = [node.asExpr(), node.asIndirectExpr()]
106105
)
107-
or
108-
node instanceof DataFlow::DefinitionByReferenceNode and
109-
isIndirect = true
110-
}
111-
112-
pragma[noinline]
113-
predicate isBarrierNode(DataFlow::Node node) {
114-
underscoreMacro([node.asExpr(), node.asIndirectExpr()])
115-
or
116-
exists(node.asExpr()) and
117-
cannotContainString(node.getType(), false)
118106
}
119107

108+
/**
109+
* Holds if `sink` is a sink is a format string of any
110+
* `FormattingFunctionCall`.
111+
*/
120112
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
121113
[sink.asExpr(), sink.asIndirectExpr()] = formatString and
122114
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
123115
}
124116

125117
module NonConstFlowConfig implements DataFlow::ConfigSig {
126-
predicate isSource(DataFlow::Node source) {
127-
exists(boolean isIndirect, Type t |
128-
isNonConst(source, isIndirect) and
129-
t = source.getType() and
130-
not cannotContainString(t, isIndirect)
131-
)
132-
}
118+
predicate isSource(DataFlow::Node source) { isNonConst(source) }
133119

134120
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
135121

136-
predicate isBarrier(DataFlow::Node node) { isBarrierNode(node) }
122+
predicate isBarrier(DataFlow::Node node) {
123+
// Ignore tracing non-const through array indices
124+
exists(ArrayExpr a | a.getArrayOffset() = node.asExpr())
125+
}
137126
}
138127

139128
module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>;
140129

141-
from FormattingFunctionCall call, Expr formatString
130+
from FormattingFunctionCall call, Expr formatString, DataFlow::Node sink
142131
where
143132
call.getArgument(call.getFormatParameterIndex()) = formatString and
144-
exists(DataFlow::Node sink |
145-
NonConstFlow::flowTo(sink) and
146-
isSinkImpl(sink, formatString)
147-
)
133+
NonConstFlow::flowTo(sink) and
134+
isSinkImpl(sink, formatString)
148135
select formatString,
149136
"The format string argument to " + call.getTarget().getName() +
150137
" should be constant to prevent security issues and other potential errors."
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The "non-constant format string" query (`cpp/non-constant-format`) has been updated to produce fewer false positives.

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/NonConstantFormat.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ extern char *dcngettext (const char *__domainname, const char *__msgid1,
2323
extern char *any_random_function(const char *);
2424

2525
#define NULL ((void*)0)
26-
#define _(X) any_random_function((X))
26+
#define _(X) gettext(X)
2727

2828
int main(int argc, char **argv) {
2929
if(argc > 1)
@@ -40,10 +40,9 @@ int main(int argc, char **argv) {
4040
printf(gettext("%d arguments\n"), argc-1); // GOOD
4141
printf(any_random_function("%d arguments\n"), argc-1); // BAD
4242

43-
// Even though `_` is mapped to `some_random_function` above,
44-
// the following call should not be flagged.
45-
printf(_(any_random_function("%d arguments\n")),
46-
argc-1); // GOOD
43+
44+
45+
printf(_(any_random_function("%d arguments\n")), argc-1); // BAD
4746

4847
return 0;
4948
}
Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,21 @@
11
| NonConstantFormat.c:30:10:30:16 | access to array | The format string argument to printf should be constant to prevent security issues and other potential errors. |
22
| NonConstantFormat.c:41:9:41:27 | call to any_random_function | The format string argument to printf should be constant to prevent security issues and other potential errors. |
3+
| NonConstantFormat.c:45:9:45:48 | call to gettext | The format string argument to printf should be constant to prevent security issues and other potential errors. |
34
| nested.cpp:21:23:21:26 | fmt0 | The format string argument to snprintf should be constant to prevent security issues and other potential errors. |
45
| nested.cpp:79:32:79:38 | call to get_fmt | The format string argument to diagnostic should be constant to prevent security issues and other potential errors. |
56
| nested.cpp:87:18:87:20 | fmt | The format string argument to diagnostic should be constant to prevent security issues and other potential errors. |
67
| test.cpp:51:10:51:21 | call to make_message | The format string argument to printf should be constant to prevent security issues and other potential errors. |
7-
| test.cpp:57:12:57:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
8-
| test.cpp:60:12:60:21 | call to const_wash | The format string argument to printf should be constant to prevent security issues and other potential errors. |
9-
| test.cpp:61:12:61:26 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
10-
| test.cpp:62:12:62:17 | + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
11-
| test.cpp:63:12:63:18 | * ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
12-
| test.cpp:64:12:64:18 | & ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
13-
| test.cpp:65:12:65:39 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
14-
| test.cpp:67:10:67:35 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
15-
| test.cpp:70:12:70:20 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
16-
| test.cpp:76:12:76:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
17-
| test.cpp:82:12:82:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
18-
| test.cpp:88:12:88:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
19-
| test.cpp:93:12:93:18 | ++ ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
20-
| test.cpp:100:12:100:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
21-
| test.cpp:110:12:110:24 | new[] | The format string argument to printf should be constant to prevent security issues and other potential errors. |
22-
| test.cpp:115:12:115:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
238
| test.cpp:130:20:130:26 | access to array | The format string argument to sprintf should be constant to prevent security issues and other potential errors. |
249
| test.cpp:157:12:157:15 | data | The format string argument to printf should be constant to prevent security issues and other potential errors. |
10+
| test.cpp:170:12:170:14 | res | The format string argument to printf should be constant to prevent security issues and other potential errors. |
11+
| test.cpp:195:31:195:33 | str | The format string argument to StringCchPrintfW should be constant to prevent security issues and other potential errors. |
12+
| test.cpp:197:11:197:14 | wstr | The format string argument to wprintf should be constant to prevent security issues and other potential errors. |
13+
| test.cpp:205:12:205:20 | ... + ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
14+
| test.cpp:206:12:206:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
15+
| test.cpp:211:12:211:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
16+
| test.cpp:217:12:217:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
17+
| test.cpp:223:12:223:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
18+
| test.cpp:228:12:228:18 | ++ ... | The format string argument to printf should be constant to prevent security issues and other potential errors. |
19+
| test.cpp:235:12:235:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
20+
| test.cpp:242:12:242:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |
21+
| test.cpp:247:12:247:16 | hello | The format string argument to printf should be constant to prevent security issues and other potential errors. |

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/nested.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extern "C" int snprintf ( char * s, int n, const char * format, ... );
1818
struct A {
1919
void do_print(const char *fmt0) {
2020
char buf[32];
21-
snprintf(buf, 32, fmt0); // GOOD [FALSE POSITIVE]
21+
snprintf(buf, 32, fmt0); // BAD, all paths from unknown const char*, not assuming literal
2222
}
2323
};
2424

@@ -34,12 +34,12 @@ struct C {
3434
void do_some_printing(const char *fmt) {
3535
b.do_printing(fmt);
3636
}
37-
const char *ext_fmt_str(void);
37+
const char *ext_fmt_str(void); // NOTE: not assuming result is literal
3838
};
3939

4040
void foo(void) {
4141
C c;
42-
c.do_some_printing(c.ext_fmt_str()); // BAD [NOT DETECTED]
42+
c.do_some_printing(c.ext_fmt_str());
4343
}
4444

4545
struct some_class {
@@ -76,12 +76,12 @@ void diagnostic(const char *fmt, ...)
7676
}
7777

7878
void bar(void) {
79-
diagnostic (some_instance->get_fmt()); // BAD
79+
diagnostic (some_instance->get_fmt()); // BAD const char* but not assuming literal
8080
}
8181

8282
namespace ns {
8383

84-
class blab {
84+
class blab {
8585
void out1(void) {
8686
char *fmt = (char *)__builtin_alloca(10);
8787
diagnostic(fmt); // BAD

0 commit comments

Comments
 (0)