Skip to content

Commit 9aefdca

Browse files
authored
Merge pull request #15875 from MathiasVP/bring-back-type-barriers-in-non-constant-format
C++: Clean up `cpp/non-constant-format`
2 parents 30d906d + 2fc0922 commit 9aefdca

File tree

4 files changed

+76
-58
lines changed

4 files changed

+76
-58
lines changed

cpp/ql/src/Likely Bugs/Format/NonConstantFormat.ql

Lines changed: 65 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,37 @@ class UncalledFunction extends Function {
3737
}
3838
}
3939

40+
/** The `unsigned short` type. */
41+
class UnsignedShort extends ShortType {
42+
UnsignedShort() { this.isUnsigned() }
43+
}
44+
45+
/**
46+
* Holds if `t` cannot refer to a string. That is, it's a built-in
47+
* or arithmetic type that is not a "`char` like" type.
48+
*/
49+
predicate cannotContainString(Type t) {
50+
exists(Type unspecified |
51+
unspecified = t.getUnspecifiedType() and
52+
not unspecified instanceof UnknownType and
53+
not unspecified instanceof CharType and
54+
not unspecified instanceof WideCharType and
55+
not unspecified instanceof Char8Type and
56+
not unspecified instanceof Char16Type and
57+
not unspecified instanceof Char32Type and
58+
// C often defines `wchar_t` as `unsigned short`
59+
not unspecified instanceof UnsignedShort
60+
|
61+
unspecified instanceof ArithmeticType or
62+
unspecified instanceof BuiltInType
63+
)
64+
}
65+
66+
predicate dataFlowOrTaintFlowFunction(Function func, FunctionOutput output) {
67+
func.(DataFlowFunction).hasDataFlow(_, output) or
68+
func.(TaintFunction).hasTaintFlow(_, output)
69+
}
70+
4071
/**
4172
* Holds if `node` is a non-constant source of data flow for non-const format string detection.
4273
* This is defined as either:
@@ -69,7 +100,9 @@ predicate isNonConst(DataFlow::Node node) {
69100
// Parameters of uncalled functions that aren't const
70101
exists(UncalledFunction f, Parameter p |
71102
f.getAParameter() = p and
72-
p = node.asParameter() and
103+
// We pick the indirection of the parameter since this query is focused
104+
// on strings.
105+
p = node.asParameter(1) and
73106
// Ignore main's argv parameter as it is already considered a `FlowSource`
74107
// not ignoring it will result in path redundancies
75108
(f.getName() = "main" implies p != f.getParameter(1))
@@ -82,30 +115,27 @@ predicate isNonConst(DataFlow::Node node) {
82115
// are considered as possible non-const sources
83116
// The function's output must also not be const to be considered a non-const source
84117
exists(Function func, CallInstruction call |
85-
// NOTE: could use `Call` getAnArgument() instead of `CallInstruction` but requires two
86-
// variables representing the same call in ordoer to use `callOutput` below.
87-
exists(Expr arg |
88-
call.getPositionalArgumentOperand(_).getDef().getUnconvertedResultExpression() = arg and
89-
arg = node.asDefiningArgument()
90-
)
91-
or
92-
call.getUnconvertedResultExpression() = node.asIndirectExpr()
118+
not func.hasDefinition() and
119+
func = call.getStaticCallTarget()
93120
|
94-
func = call.getStaticCallTarget() and
121+
// Case 1: It's a known dataflow or taintflow function with flow to the return value
122+
call.getUnconvertedResultExpression() = node.asIndirectExpr() and
95123
not exists(FunctionOutput output |
96-
// NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf
97-
// variant function's output are now possible non-const sources
98-
pragma[only_bind_out](func).(DataFlowFunction).hasDataFlow(_, output) or
99-
pragma[only_bind_out](func).(TaintFunction).hasTaintFlow(_, output)
100-
|
124+
dataFlowOrTaintFlowFunction(func, output) and
125+
output.isReturnValueDeref(_) and
101126
node = callOutput(call, output)
102127
)
103-
) and
104-
not exists(Call c |
105-
c.getTarget().hasDefinition() and
106-
if node instanceof DataFlow::DefinitionByReferenceNode
107-
then c.getAnArgument() = node.asDefiningArgument()
108-
else c = [node.asExpr(), node.asIndirectExpr()]
128+
or
129+
// Case 2: It's a known dataflow or taintflow function with flow to an output parameter
130+
exists(int i |
131+
call.getPositionalArgumentOperand(i).getDef().getUnconvertedResultExpression() =
132+
node.asDefiningArgument() and
133+
not exists(FunctionOutput output |
134+
dataFlowOrTaintFlowFunction(func, output) and
135+
output.isParameterDeref(i, _) and
136+
node = callOutput(call, output)
137+
)
138+
)
109139
)
110140
}
111141

@@ -114,18 +144,29 @@ predicate isNonConst(DataFlow::Node node) {
114144
* `FormattingFunctionCall`.
115145
*/
116146
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
117-
[sink.asExpr(), sink.asIndirectExpr()] = formatString and
147+
sink.asIndirectExpr() = formatString and
118148
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
119149
}
120150

121151
module NonConstFlowConfig implements DataFlow::ConfigSig {
122-
predicate isSource(DataFlow::Node source) { isNonConst(source) }
152+
predicate isSource(DataFlow::Node source) {
153+
exists(Type t |
154+
isNonConst(source) and
155+
t = source.getType() and
156+
not cannotContainString(t)
157+
)
158+
}
123159

124160
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
125161

126162
predicate isBarrier(DataFlow::Node node) {
127163
// Ignore tracing non-const through array indices
128-
exists(ArrayExpr a | a.getArrayOffset() = node.asExpr())
164+
exists(ArrayExpr a | a.getArrayOffset() = node.asIndirectExpr())
165+
or
166+
exists(Type t |
167+
t = node.getType() and
168+
cannotContainString(t)
169+
)
129170
}
130171
}
131172

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/NonConstantFormat.expected

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,10 @@ edges
88
| nested.cpp:35:19:35:21 | *fmt | nested.cpp:27:32:27:34 | *fmt | provenance | |
99
| nested.cpp:42:24:42:34 | *call to ext_fmt_str | nested.cpp:34:37:34:39 | *fmt | provenance | |
1010
| nested.cpp:86:19:86:46 | *call to __builtin_alloca | nested.cpp:87:18:87:20 | *fmt | provenance | |
11-
| test.cpp:27:39:27:39 | n | test.cpp:27:13:27:24 | **make_message | provenance | |
12-
| test.cpp:46:14:46:17 | argc | test.cpp:51:23:51:30 | ... - ... | provenance | |
1311
| test.cpp:46:27:46:30 | **argv | test.cpp:130:20:130:26 | *access to array | provenance | |
14-
| test.cpp:51:23:51:30 | ... - ... | test.cpp:27:39:27:39 | n | provenance | |
15-
| test.cpp:51:23:51:30 | ... - ... | test.cpp:51:10:51:21 | *call to make_message | provenance | |
16-
| test.cpp:155:27:155:30 | data | test.cpp:157:12:157:15 | data | provenance | |
17-
| test.cpp:167:31:167:34 | data | test.cpp:170:12:170:14 | *res | provenance | |
18-
| test.cpp:193:32:193:34 | str | test.cpp:195:31:195:33 | str | provenance | |
19-
| test.cpp:193:32:193:34 | str | test.cpp:197:11:197:14 | *wstr | provenance | |
12+
| test.cpp:167:31:167:34 | *data | test.cpp:170:12:170:14 | *res | provenance | |
13+
| test.cpp:193:32:193:34 | *str | test.cpp:195:31:195:33 | *str | provenance | |
14+
| test.cpp:193:32:193:34 | *str | test.cpp:197:11:197:14 | *wstr | provenance | |
2015
| test.cpp:204:25:204:36 | *call to get_string | test.cpp:205:12:205:20 | *... + ... | provenance | |
2116
| test.cpp:204:25:204:36 | *call to get_string | test.cpp:206:12:206:16 | *hello | provenance | |
2217
| test.cpp:209:25:209:36 | *call to get_string | test.cpp:211:12:211:16 | *hello | provenance | |
@@ -42,19 +37,12 @@ nodes
4237
| nested.cpp:79:32:79:38 | *call to get_fmt | semmle.label | *call to get_fmt |
4338
| nested.cpp:86:19:86:46 | *call to __builtin_alloca | semmle.label | *call to __builtin_alloca |
4439
| nested.cpp:87:18:87:20 | *fmt | semmle.label | *fmt |
45-
| test.cpp:27:13:27:24 | **make_message | semmle.label | **make_message |
46-
| test.cpp:27:39:27:39 | n | semmle.label | n |
47-
| test.cpp:46:14:46:17 | argc | semmle.label | argc |
4840
| test.cpp:46:27:46:30 | **argv | semmle.label | **argv |
49-
| test.cpp:51:10:51:21 | *call to make_message | semmle.label | *call to make_message |
50-
| test.cpp:51:23:51:30 | ... - ... | semmle.label | ... - ... |
5141
| test.cpp:130:20:130:26 | *access to array | semmle.label | *access to array |
52-
| test.cpp:155:27:155:30 | data | semmle.label | data |
53-
| test.cpp:157:12:157:15 | data | semmle.label | data |
54-
| test.cpp:167:31:167:34 | data | semmle.label | data |
42+
| test.cpp:167:31:167:34 | *data | semmle.label | *data |
5543
| test.cpp:170:12:170:14 | *res | semmle.label | *res |
56-
| test.cpp:193:32:193:34 | str | semmle.label | str |
57-
| test.cpp:195:31:195:33 | str | semmle.label | str |
44+
| test.cpp:193:32:193:34 | *str | semmle.label | *str |
45+
| test.cpp:195:31:195:33 | *str | semmle.label | *str |
5846
| test.cpp:197:11:197:14 | *wstr | semmle.label | *wstr |
5947
| test.cpp:204:25:204:36 | *call to get_string | semmle.label | *call to get_string |
6048
| test.cpp:205:12:205:20 | *... + ... | semmle.label | *... + ... |
@@ -74,20 +62,17 @@ nodes
7462
| test.cpp:245:25:245:36 | *call to get_string | semmle.label | *call to get_string |
7563
| test.cpp:247:12:247:16 | *hello | semmle.label | *hello |
7664
subpaths
77-
| test.cpp:51:23:51:30 | ... - ... | test.cpp:27:39:27:39 | n | test.cpp:27:13:27:24 | **make_message | test.cpp:51:10:51:21 | *call to make_message |
7865
#select
7966
| NonConstantFormat.c:30:10:30:16 | *access to array | NonConstantFormat.c:28:27:28:30 | **argv | NonConstantFormat.c:30:10:30:16 | *access to array | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | NonConstantFormat.c:30:3:30:8 | call to printf | printf |
8067
| NonConstantFormat.c:41:9:41:45 | *call to any_random_function | NonConstantFormat.c:41:9:41:45 | *call to any_random_function | NonConstantFormat.c:41:9:41:45 | *call to any_random_function | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | NonConstantFormat.c:41:2:41:7 | call to printf | printf |
8168
| NonConstantFormat.c:45:9:45:48 | *call to gettext | NonConstantFormat.c:45:11:45:47 | *call to any_random_function | NonConstantFormat.c:45:9:45:48 | *call to gettext | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | NonConstantFormat.c:45:2:45:7 | call to printf | printf |
8269
| nested.cpp:21:23:21:26 | *fmt0 | nested.cpp:42:24:42:34 | *call to ext_fmt_str | nested.cpp:21:23:21:26 | *fmt0 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | nested.cpp:21:5:21:12 | call to snprintf | snprintf |
8370
| nested.cpp:79:32:79:38 | *call to get_fmt | nested.cpp:79:32:79:38 | *call to get_fmt | nested.cpp:79:32:79:38 | *call to get_fmt | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | nested.cpp:79:5:79:14 | call to diagnostic | diagnostic |
8471
| nested.cpp:87:18:87:20 | *fmt | nested.cpp:86:19:86:46 | *call to __builtin_alloca | nested.cpp:87:18:87:20 | *fmt | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | nested.cpp:87:7:87:16 | call to diagnostic | diagnostic |
85-
| test.cpp:51:10:51:21 | *call to make_message | test.cpp:46:14:46:17 | argc | test.cpp:51:10:51:21 | *call to make_message | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:51:3:51:8 | call to printf | printf |
8672
| test.cpp:130:20:130:26 | *access to array | test.cpp:46:27:46:30 | **argv | test.cpp:130:20:130:26 | *access to array | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:130:2:130:10 | call to sprintf | sprintf |
87-
| test.cpp:157:12:157:15 | data | test.cpp:155:27:155:30 | data | test.cpp:157:12:157:15 | data | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:157:5:157:10 | call to printf | printf |
88-
| test.cpp:170:12:170:14 | *res | test.cpp:167:31:167:34 | data | test.cpp:170:12:170:14 | *res | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:170:5:170:10 | call to printf | printf |
89-
| test.cpp:195:31:195:33 | str | test.cpp:193:32:193:34 | str | test.cpp:195:31:195:33 | str | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:195:3:195:18 | call to StringCchPrintfW | StringCchPrintfW |
90-
| test.cpp:197:11:197:14 | *wstr | test.cpp:193:32:193:34 | str | test.cpp:197:11:197:14 | *wstr | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:197:3:197:9 | call to wprintf | wprintf |
73+
| test.cpp:170:12:170:14 | *res | test.cpp:167:31:167:34 | *data | test.cpp:170:12:170:14 | *res | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:170:5:170:10 | call to printf | printf |
74+
| test.cpp:195:31:195:33 | *str | test.cpp:193:32:193:34 | *str | test.cpp:195:31:195:33 | *str | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:195:3:195:18 | call to StringCchPrintfW | StringCchPrintfW |
75+
| test.cpp:197:11:197:14 | *wstr | test.cpp:193:32:193:34 | *str | test.cpp:197:11:197:14 | *wstr | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:197:3:197:9 | call to wprintf | wprintf |
9176
| test.cpp:205:12:205:20 | *... + ... | test.cpp:204:25:204:36 | *call to get_string | test.cpp:205:12:205:20 | *... + ... | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:205:5:205:10 | call to printf | printf |
9277
| test.cpp:206:12:206:16 | *hello | test.cpp:204:25:204:36 | *call to get_string | test.cpp:206:12:206:16 | *hello | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:206:5:206:10 | call to printf | printf |
9378
| test.cpp:211:12:211:16 | *hello | test.cpp:209:25:209:36 | *call to get_string | test.cpp:211:12:211:16 | *hello | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:211:5:211:10 | call to printf | printf |

cpp/ql/test/query-tests/Likely Bugs/Format/NonConstantFormat/test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ int main(int argc, char **argv) {
4848
printf(choose_message(argc - 1), argc - 1); // GOOD
4949
printf(messages[1]); // GOOD
5050
printf(message); // GOOD
51-
printf(make_message(argc - 1)); // BAD
51+
printf(make_message(argc - 1)); // BAD [NOT DETECTED]
5252
printf("Hello, World\n"); // GOOD
5353
printf(_("Hello, World\n")); // GOOD
5454
{
@@ -154,7 +154,7 @@ void print_ith_message() {
154154

155155
void fmt_via_strcpy(char *data) {
156156
strcpy(data, "some string");
157-
printf(data); // GOOD [FALSE POSITIVE: Due to inaccurate dataflow killers]
157+
printf(data); // GOOD
158158
}
159159

160160
void fmt_with_assignment() {

cpp/ql/test/query-tests/Security/CWE/CWE-134/semmle/consts/NonConstantFormat.expected

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@ edges
2323
| consts.cpp:106:13:106:19 | *call to varFunc | consts.cpp:107:9:107:10 | *v5 | provenance | |
2424
| consts.cpp:111:7:111:13 | *call to varFunc | consts.cpp:112:9:112:10 | *v6 | provenance | |
2525
| consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | *v11 | provenance | |
26-
| consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | v11 | provenance | |
2726
| consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | *v12 | provenance | |
28-
| consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | v12 | provenance | |
2927
nodes
3028
| consts.cpp:24:7:24:9 | **gv1 | semmle.label | **gv1 |
3129
| consts.cpp:29:7:29:25 | **nonConstFuncToArray | semmle.label | **nonConstFuncToArray |
@@ -47,13 +45,9 @@ nodes
4745
| consts.cpp:130:9:130:10 | *v9 | semmle.label | *v9 |
4846
| consts.cpp:135:9:135:11 | *v10 | semmle.label | *v10 |
4947
| consts.cpp:139:13:139:16 | readString output argument | semmle.label | readString output argument |
50-
| consts.cpp:139:13:139:16 | readString output argument | semmle.label | readString output argument |
5148
| consts.cpp:140:9:140:11 | *v11 | semmle.label | *v11 |
52-
| consts.cpp:140:9:140:11 | v11 | semmle.label | v11 |
53-
| consts.cpp:144:16:144:18 | readStringRef output argument | semmle.label | readStringRef output argument |
5449
| consts.cpp:144:16:144:18 | readStringRef output argument | semmle.label | readStringRef output argument |
5550
| consts.cpp:145:9:145:11 | *v12 | semmle.label | *v12 |
56-
| consts.cpp:145:9:145:11 | v12 | semmle.label | v12 |
5751
subpaths
5852
#select
5953
| consts.cpp:86:9:86:10 | *v1 | consts.cpp:85:7:85:8 | gets output argument | consts.cpp:86:9:86:10 | *v1 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:86:2:86:7 | call to printf | printf |
@@ -78,6 +72,4 @@ subpaths
7872
| consts.cpp:135:9:135:11 | *v10 | consts.cpp:85:7:85:8 | gets output argument | consts.cpp:135:9:135:11 | *v10 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:135:2:135:7 | call to printf | printf |
7973
| consts.cpp:135:9:135:11 | *v10 | consts.cpp:90:12:90:13 | gets output argument | consts.cpp:135:9:135:11 | *v10 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:135:2:135:7 | call to printf | printf |
8074
| consts.cpp:140:9:140:11 | *v11 | consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | *v11 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:140:2:140:7 | call to printf | printf |
81-
| consts.cpp:140:9:140:11 | v11 | consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | v11 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:140:2:140:7 | call to printf | printf |
8275
| consts.cpp:145:9:145:11 | *v12 | consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | *v12 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:145:2:145:7 | call to printf | printf |
83-
| consts.cpp:145:9:145:11 | v12 | consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | v12 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:145:2:145:7 | call to printf | printf |

0 commit comments

Comments
 (0)