Skip to content

Commit 80bf38d

Browse files
committed
Initial working draft of non-const source refactor.
1 parent 5e5fea8 commit 80bf38d

File tree

4 files changed

+164
-116
lines changed

4 files changed

+164
-116
lines changed

cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql

Lines changed: 54 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -18,115 +18,80 @@
1818
import semmle.code.cpp.ir.dataflow.TaintTracking
1919
import semmle.code.cpp.commons.Printf
2020
import semmle.code.cpp.security.FlowSources
21+
import semmle.code.cpp.ir.dataflow.internal.ModelUtil
22+
import semmle.code.cpp.models.interfaces.DataFlow
23+
import semmle.code.cpp.models.interfaces.Taint
24+
import semmle.code.cpp.ir.implementation.raw.Instruction
2125

2226
class UncalledFunction extends Function {
23-
UncalledFunction() { not exists(Call c | c.getTarget() = this) }
27+
UncalledFunction() {
28+
not exists(Call c | c.getTarget() = this) and
29+
not this.(MemberFunction).overrides(_)
30+
}
2431
}
2532

26-
// For the following `...gettext` functions, we assume that
27-
// all translations preserve the type and order of `%` specifiers
28-
// (and hence are safe to use as format strings). This
29-
// assumption is hard-coded into the query.
30-
predicate whitelistFunction(Function f, int arg) {
31-
// basic variations of gettext
32-
f.getName() = "_" and arg = 0
33-
or
34-
f.getName() = "gettext" and arg = 0
35-
or
36-
f.getName() = "dgettext" and arg = 1
37-
or
38-
f.getName() = "dcgettext" and arg = 1
39-
or
40-
// plural variations of gettext that take one format string for singular and another for plural form
41-
f.getName() = "ngettext" and
42-
(arg = 0 or arg = 1)
33+
/**
34+
* Holds if `node` is a non-constant source of data flow.
35+
* This is defined as either:
36+
* 1) a `FlowSource`
37+
* 2) a parameter of an 'uncalled' function (i.e., a function that is not called in the program)
38+
* 3) an argument to a function with no definition that is not known to define the output through its input
39+
* 4) an out arg of a function with no definition that is not known to define the output through its input
40+
*
41+
* The latter two cases address identifying standard string manipulation libraries as input sources
42+
* e.g., strcpy, but it will identify unknown function calls as possible non-constant source
43+
* since it cannot be determined if the out argument or return is constant.
44+
*/
45+
predicate isNonConst(DataFlow::Node node) {
46+
node instanceof FlowSource
4347
or
44-
f.getName() = "dngettext" and
45-
(arg = 1 or arg = 2)
48+
exists(UncalledFunction f | f.getAParameter() = node.asParameter())
4649
or
47-
f.getName() = "dcngettext" and
48-
(arg = 1 or arg = 2)
49-
}
50-
51-
// we assume that ALL uses of the `_` macro
52-
// return constant string literals
53-
predicate underscoreMacro(Expr e) {
54-
exists(MacroInvocation mi |
55-
mi.getMacroName() = "_" and
56-
mi.getExpr() = e
50+
// Consider as an input any out arg of a function or a function's return where the function is not:
51+
// 1. a function with a known dataflow or taintflow from input to output and the `node` is the output
52+
// 2. a function where there is a known definition
53+
// i.e., functions that with unknown bodies and are not known to define the output through its input
54+
// are considered as possible non-const sources
55+
(
56+
node instanceof DataFlow::DefinitionByReferenceNode or
57+
node.asIndirectExpr() instanceof Call
58+
) and
59+
not exists(Function func, FunctionInput input, FunctionOutput output, CallInstruction call |
60+
// NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf
61+
// variant function's output are now possible non-const sources
62+
(
63+
func.(DataFlowFunction).hasDataFlow(input, output) or
64+
func.(TaintFunction).hasTaintFlow(input, output)
65+
) and
66+
node = callOutput(call, output) and
67+
call.getStaticCallTarget() = func
68+
) and
69+
not exists(Call c |
70+
c.getTarget().hasDefinition() and
71+
if node instanceof DataFlow::DefinitionByReferenceNode
72+
then c.getAnArgument() = node.asDefiningArgument()
73+
else c = [node.asExpr(), node.asIndirectExpr()]
5774
)
5875
}
5976

6077
/**
61-
* Holds if `t` cannot hold a character array, directly or indirectly.
78+
* Holds if `sink` is a sink is a format string of any
79+
* `FormattingFunctionCall`.
6280
*/
63-
predicate cannotContainString(Type t, boolean isIndirect) {
64-
isIndirect = false and
65-
exists(Type unspecified |
66-
unspecified = t.getUnspecifiedType() and
67-
not unspecified instanceof UnknownType
68-
|
69-
unspecified instanceof BuiltInType or
70-
unspecified instanceof IntegralOrEnumType
71-
)
72-
}
73-
74-
predicate isNonConst(DataFlow::Node node, boolean isIndirect) {
75-
exists(Expr e |
76-
e = node.asExpr() and isIndirect = false
77-
or
78-
e = node.asIndirectExpr() and isIndirect = true
79-
|
80-
exists(FunctionCall fc | fc = e |
81-
not (
82-
whitelistFunction(fc.getTarget(), _) or
83-
fc.getTarget().hasDefinition()
84-
)
85-
)
86-
or
87-
exists(Variable v | v = e.(VariableAccess).getTarget() |
88-
v.getType().(ArrayType).getBaseType() instanceof CharType and
89-
exists(AssignExpr ae |
90-
ae.getLValue().(ArrayExpr).getArrayBase().(VariableAccess).getTarget() = v
91-
)
92-
)
93-
or
94-
exists(UncalledFunction f, Parameter p | f.getAParameter() = p |
95-
p = e.(VariableAccess).getTarget()
96-
)
97-
or
98-
node instanceof FlowSource
99-
or
100-
node instanceof DataFlow::DefinitionByReferenceNode and
101-
not exists(FormattingFunctionCall fc | node.asDefiningArgument() = fc.getOutputArgument(_)) and
102-
not exists(Call c |
103-
c.getAnArgument() = node.asDefiningArgument() and c.getTarget().hasDefinition()
104-
)
105-
)
106-
or
107-
node instanceof DataFlow::DefinitionByReferenceNode and
108-
isIndirect = true
109-
}
110-
111-
pragma[noinline]
112-
predicate isBarrierNode(DataFlow::Node node) {
113-
underscoreMacro([node.asExpr(), node.asIndirectExpr()])
114-
or
115-
exists(node.asExpr()) and
116-
cannotContainString(node.getType(), false)
117-
}
118-
11981
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
12082
[sink.asExpr(), sink.asIndirectExpr()] = formatString and
12183
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
12284
}
12385

12486
module NonConstFlowConfig implements DataFlow::ConfigSig {
125-
predicate isSource(DataFlow::Node source) { isNonConst(source, _) }
87+
predicate isSource(DataFlow::Node source) { isNonConst(source) }
12688

12789
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
12890

129-
predicate isBarrier(DataFlow::Node node) { isBarrierNode(node) }
91+
predicate isBarrier(DataFlow::Node node) {
92+
// Ignore tracing non-const through array indices
93+
exists(ArrayExpr a | a.getArrayOffset() = node.asExpr())
94+
}
13095
}
13196

13297
module NonConstFlow = TaintTracking::Global<NonConstFlowConfig>;

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/NonConstantFormat.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ extern char *dcngettext (const char *__domainname, const char *__msgid1,
2323
extern char *any_random_function(const char *);
2424

2525
#define NULL ((void*)0)
26-
#define _(X) any_random_function((X))
26+
#define _(X) gettext(X)
2727

2828
int main(int argc, char **argv) {
2929
if(argc > 1)
@@ -40,10 +40,9 @@ int main(int argc, char **argv) {
4040
printf(gettext("%d arguments\n"), argc-1); // GOOD
4141
printf(any_random_function("%d arguments\n"), argc-1); // BAD
4242

43-
// Even though `_` is mapped to `some_random_function` above,
44-
// the following call should not be flagged.
45-
printf(_(any_random_function("%d arguments\n")),
46-
argc-1); // GOOD
43+
44+
45+
printf(_(any_random_function("%d arguments\n")), argc-1); // BAD
4746

4847
return 0;
4948
}

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/nested.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extern "C" int snprintf ( char * s, int n, const char * format, ... );
1818
struct A {
1919
void do_print(const char *fmt0) {
2020
char buf[32];
21-
snprintf(buf, 32, fmt0); // GOOD [FALSE POSITIVE]
21+
snprintf(buf, 32, fmt0); // BAD through call from c.do_some_printing(c.ext_fmt_str())
2222
}
2323
};
2424

@@ -39,7 +39,7 @@ struct C {
3939

4040
void foo(void) {
4141
C c;
42-
c.do_some_printing(c.ext_fmt_str()); // BAD [NOT DETECTED]
42+
c.do_some_printing(c.ext_fmt_str());
4343
}
4444

4545
struct some_class {

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/test.cpp

Lines changed: 104 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -54,66 +54,66 @@ int main(int argc, char **argv) {
5454
{
5555
char hello[] = "hello, World\n";
5656
hello[0] = 'H';
57-
printf(hello); // BAD
57+
printf(hello); // GOOD
5858
printf(_(hello)); // GOOD
5959
printf(gettext(hello)); // GOOD
60-
printf(const_wash(hello)); // BAD
61-
printf((hello + 1) + 1); // BAD
62-
printf(+hello); // BAD
63-
printf(*&hello); // BAD
64-
printf(&*hello); // BAD
65-
printf((char*)(void*)+(hello+1) + 1); // BAD
60+
printf(const_wash(hello)); // GOOD
61+
printf((hello + 1) + 1); // GOOD
62+
printf(+hello); // GOOD
63+
printf(*&hello); // GOOD
64+
printf(&*hello); // GOOD
65+
printf((char*)(void*)+(hello+1) + 1); // GOOD
6666
}
67-
printf(("Hello, World\n" + 1) + 1); // BAD
67+
printf(("Hello, World\n" + 1) + 1); // GOOD
6868
{
6969
const char *hello = "Hello, World\n";
70-
printf(hello + 1); // BAD
70+
printf(hello + 1); // GOOD
7171
printf(hello); // GOOD
7272
}
7373
{
7474
const char *hello = "Hello, World\n";
7575
hello += 1;
76-
printf(hello); // BAD
76+
printf(hello); // GOOD
7777
}
7878
{
7979
// Same as above block but using "x = x + 1" syntax
8080
const char *hello = "Hello, World\n";
8181
hello = hello + 1;
82-
printf(hello); // BAD
82+
printf(hello); // GOOD
8383
}
8484
{
8585
// Same as above block but using "x++" syntax
8686
const char *hello = "Hello, World\n";
8787
hello++;
88-
printf(hello); // BAD
88+
printf(hello); // GOOD
8989
}
9090
{
9191
// Same as above block but using "++x" as subexpression
9292
const char *hello = "Hello, World\n";
93-
printf(++hello); // BAD
93+
printf(++hello); // GOOD
9494
}
9595
{
9696
// Same as above block but through a pointer
9797
const char *hello = "Hello, World\n";
9898
const char **p = &hello;
9999
(*p)++;
100-
printf(hello); // BAD
100+
printf(hello); // GOOD
101101
}
102102
{
103103
// Same as above block but through a C++ reference
104104
const char *hello = "Hello, World\n";
105105
const char *&p = hello;
106106
p++;
107-
printf(hello); // BAD [NOT DETECTED]
107+
printf(hello); // GOOD
108108
}
109109
if (gettext_debug) {
110-
printf(new char[100]); // BAD
110+
printf(new char[100]); // BAD [FALSE NEGATIVE]
111111
}
112112
{
113113
const char *hello = "Hello, World\n";
114-
const char *const *p = &hello; // harmless reference to const pointer
115-
printf(hello); // GOOD [FALSE POSITIVE]
116-
hello++; // modification comes after use and so does no harm
114+
const char *const *p = &hello;
115+
printf(hello); // GOOD
116+
hello++;
117117
}
118118
printf(argc > 2 ? "More than one\n" : _("Only one\n")); // GOOD
119119

@@ -154,7 +154,7 @@ void print_ith_message() {
154154

155155
void fmt_via_strcpy(char *data) {
156156
strcpy(data, "some string");
157-
printf(data); // BAD
157+
printf(data); // GOOD [FALSE POSITIVE: Due to inaccurate dataflow killers]
158158
}
159159

160160
void fmt_with_assignment() {
@@ -163,3 +163,87 @@ void fmt_with_assignment() {
163163
x = y = "a";
164164
printf(y); // GOOD
165165
}
166+
167+
void fmt_via_strcpy_bad(char *data) {
168+
char res[100];
169+
strcpy(res, data);
170+
printf(res); // BAD
171+
}
172+
173+
174+
int wprintf(const wchar_t *format,...);
175+
typedef wchar_t *STRSAFE_LPWSTR;
176+
typedef const wchar_t *STRSAFE_LPCWSTR;
177+
typedef unsigned int size_t;
178+
179+
void StringCchPrintfW(
180+
STRSAFE_LPWSTR pszDest,
181+
size_t cchDest,
182+
STRSAFE_LPCWSTR pszFormat,
183+
...
184+
);
185+
186+
void wchar_t_test_good(){
187+
wchar_t wstr[100];
188+
StringCchPrintfW(wstr, 100, L"STRING"); // GOOD
189+
190+
wprintf(wstr); // GOOD
191+
}
192+
193+
void wchar_t_test_bad(wchar_t* str){
194+
wchar_t wstr[100];
195+
StringCchPrintfW(wstr, 100, str); // BAD
196+
197+
wprintf(wstr); // BAD
198+
}
199+
200+
const char* get_string();
201+
202+
void pointer_arithmetic_test_on_bad_string(){
203+
{
204+
const char *hello = get_string();
205+
printf(hello + 1); // BAD
206+
printf(hello); // BAD
207+
}
208+
{
209+
const char *hello = get_string();
210+
hello += 1;
211+
printf(hello); // BAD
212+
}
213+
{
214+
// Same as above block but using "x = x + 1" syntax
215+
const char *hello = get_string();
216+
hello = hello + 1;
217+
printf(hello); // BAD
218+
}
219+
{
220+
// Same as above block but using "x++" syntax
221+
const char *hello = get_string();
222+
hello++;
223+
printf(hello); // BAD
224+
}
225+
{
226+
// Same as above block but using "++x" as subexpression
227+
const char *hello = get_string();
228+
printf(++hello); // BAD
229+
}
230+
{
231+
// Same as above block but through a pointer
232+
const char *hello = get_string();
233+
const char **p = &hello;
234+
(*p)++;
235+
printf(hello); // BAD
236+
}
237+
{
238+
// Same as above block but through a C++ reference
239+
const char *hello = get_string();
240+
const char *&p = hello;
241+
p++;
242+
printf(hello); // BAD
243+
}
244+
{
245+
const char *hello = get_string();
246+
const char *const *p = &hello;
247+
printf(hello); // BAD
248+
}
249+
}

0 commit comments

Comments
 (0)