Skip to content

Commit 0db05fe

Browse files
committed
C++: Use the new dataflow library in the 'missing scanf' query.
1 parent 8c46bfd commit 0db05fe

File tree

3 files changed

+100
-132
lines changed

3 files changed

+100
-132
lines changed

cpp/ql/src/Critical/MissingCheckScanf.ql

Lines changed: 91 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -16,168 +16,141 @@
1616
import cpp
1717
import semmle.code.cpp.commons.Scanf
1818
import semmle.code.cpp.controlflow.Guards
19-
import semmle.code.cpp.ir.dataflow.DataFlow
19+
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
2020
import semmle.code.cpp.ir.IR
2121
import semmle.code.cpp.ir.ValueNumbering
2222

23-
/**
24-
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
25-
*
26-
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
27-
* of `call`, and `vn` is the value number of `instr.`
28-
*/
29-
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
30-
output = call.getOutputArgument(index).getFullyConverted() and
31-
instr.getConvertedResultExpression() = output and
32-
vn.getAnInstruction() = instr
33-
}
34-
35-
/**
36-
* Holds if `instr` is control-flow reachable in 0 or more steps from
37-
* a call to a `scanf`-like function.
38-
*/
23+
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
3924
pragma[nomagic]
40-
predicate fwdFlow0(Instruction instr) {
41-
isSource(_, _, instr, _, _)
25+
predicate revFlow0(Node n) {
26+
isSink(_, _, n, _)
4227
or
43-
exists(Instruction prev |
44-
fwdFlow0(prev) and
45-
prev.getASuccessor() = instr
46-
)
28+
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
4729
}
4830

4931
/**
50-
* Holds if `instr` is part of the IR translation of `access` that
51-
* is not an expression being deallocated, and `instr` has value
52-
* number `vn`.
32+
* Holds if `n` represents an uninitialized stack-allocated variable, or a
33+
* newly (and presumed uninitialized) heap allocation.
5334
*/
54-
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
55-
instr.getAst() = access and
56-
not any(DeallocationExpr dealloc).getFreedExpr() = access and
57-
vn.getAnInstruction() = instr
35+
predicate isUninitialized(Node n) {
36+
exists(n.asUninitialized()) or
37+
n.asIndirectExpr(1) instanceof AllocationExpr
5838
}
5939

60-
/**
61-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
62-
* to a use of the written variable.
63-
*/
6440
pragma[nomagic]
65-
predicate revFlow0(Instruction instr) {
66-
fwdFlow0(instr) and
41+
predicate fwdFlow0(Node n) {
42+
revFlow0(n) and
6743
(
68-
isSink(instr, _, _)
44+
isUninitialized(n)
6945
or
70-
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
46+
exists(Node prev |
47+
fwdFlow0(prev) and
48+
localFlowStep(prev, n)
49+
)
7150
)
7251
}
7352

53+
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
54+
input = call.getOutputArgument(index) and
55+
n.asIndirectExpr() = input
56+
}
57+
7458
/**
75-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
76-
* that writes to a variable with value number `vn`, without passing through
77-
* redefinitions of the variable.
59+
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
60+
* argument that has not been previously initialized.
7861
*/
79-
pragma[nomagic]
80-
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
81-
revFlow0(instr) and
82-
(
83-
isSource(_, _, instr, vn, _)
84-
or
85-
exists(Instruction prev |
86-
fwdFlow(prev, vn) and
87-
prev.getASuccessor() = instr and
88-
not isBarrier(instr, vn)
89-
)
90-
)
62+
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
63+
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
9164
}
9265

9366
/**
94-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
95-
* that writes to a variable with value number `vn`, without passing through
96-
* redefinitions of the variable.
97-
*
98-
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
99-
* control-flow reachable from an argument to a `scanf`-like function call.
67+
* Holds if `call` is a `scanf`-like function that may write to `output` at
68+
* index `index` and `n` is the dataflow node that represents the data after
69+
* it has been written to by `call`.
10070
*/
101-
pragma[nomagic]
102-
predicate revFlow(Instruction instr, ValueNumber vn) {
103-
fwdFlow(instr, pragma[only_bind_out](vn)) and
104-
(
105-
isSink(instr, _, vn)
106-
or
107-
exists(Instruction succ | revFlow(succ, vn) |
108-
instr.getASuccessor() = succ and
109-
not isBarrier(succ, vn)
110-
)
111-
)
71+
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
72+
isRelevantScanfCall(call, index, output) and
73+
output = call.getOutputArgument(index) and
74+
n.asDefiningArgument() = output
11275
}
11376

11477
/**
115-
* A type that bundles together a reachable instruction with the appropriate
116-
* value number (i.e., the value number that's transferred from the source
117-
* to the sink).
78+
* Holds if `n` is reachable from an output argument of a relevant call to
79+
* a `scanf`-like function.
11880
*/
119-
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
120-
121-
class Node extends MkNode {
122-
ValueNumber vn;
123-
Instruction instr;
124-
125-
Node() { this = MkNode(instr, vn) }
126-
127-
final string toString() { result = instr.toString() }
81+
pragma[nomagic]
82+
predicate fwdFlow(Node n) {
83+
isSource(_, _, n, _)
84+
or
85+
exists(Node prev |
86+
fwdFlow(prev) and
87+
localFlowStep(prev, n) and
88+
not isSanitizerOut(prev)
89+
)
90+
}
12891

129-
final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
92+
/** Holds if `n` should not have outgoing flow. */
93+
predicate isSanitizerOut(Node n) {
94+
// We disable flow out of sinks to reduce result duplication
95+
isSink(n, _)
96+
or
97+
// If the node is being passed to a function it may be
98+
// modified, and thus it's safe to later read the value.
99+
exists(n.asIndirectArgument())
100+
}
130101

131-
final Location getLocation() { result = instr.getLocation() }
102+
/**
103+
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
104+
* argument of a deallocation expression.
105+
*/
106+
predicate isSink(Node n, Expr e) {
107+
n.asExpr() = e and
108+
not any(DeallocationExpr dealloc).getFreedExpr() = e
132109
}
133110

134111
/**
135-
* Holds if `instr` is an instruction with value number `vn` that is
136-
* used in a store operation, or is overwritten by another call to
137-
* a `scanf`-like function.
112+
* Holds if `n` is part of a path from a call to a `scanf`-like function
113+
* to a use of the written variable.
138114
*/
139-
private predicate isBarrier(Instruction instr, ValueNumber vn) {
140-
// We only need to compute barriers for instructions that we
141-
// managed to hit during the initial flow stage.
142-
revFlow0(pragma[only_bind_into](instr)) and
143-
valueNumber(instr) = vn and
144-
exists(Expr e | instr.getAst() = e |
145-
instr = any(StoreInstruction s).getDestinationAddress()
115+
pragma[nomagic]
116+
predicate revFlow(Node n) {
117+
fwdFlow(n) and
118+
(
119+
isSink(n, _)
146120
or
147-
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
121+
exists(Node succ |
122+
revFlow(succ) and
123+
localFlowStep(n, succ) and
124+
not isSanitizerOut(n)
125+
)
148126
)
149127
}
150128

151-
/** Holds if `n1` steps to `n2` in a single step. */
152-
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
153-
154-
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
129+
/** A local flow step, restricted to relevant dataflow nodes. */
130+
private predicate step(Node n1, Node n2) {
131+
revFlow(n1) and
132+
revFlow(n2) and
133+
localFlowStep(n1, n2)
134+
}
155135

156-
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
136+
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
157137

158138
/**
159139
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
160-
* an instruction that is part of the translation of `access` which is a transitive
161-
* control-flow successor of `call`.
162-
*
163-
* Furthermore, `source` and `sink` have identical global value numbers.
140+
* a dataflow node that represents the expression `e`.
164141
*/
165-
predicate hasFlow(
166-
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
167-
) {
168-
exists(ValueNumber vn |
169-
isSource(call, index, source, vn, _) and
170-
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
171-
isSink(sink, access, vn)
172-
)
142+
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
143+
isSource(call, index, source, _) and
144+
hasFlow(source, sink) and
145+
isSink(sink, e)
173146
}
174147

175148
/**
176149
* Gets the smallest possible `scanf` return value of `call` that would indicate
177150
* success in writing the output argument at index `index`.
178151
*/
179152
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
180-
isSource(call, index, _, _, _) and
153+
isSource(call, index, _, _) and
181154
result =
182155
index + 1 -
183156
count(ScanfFormatLiteral f, int n |
@@ -191,7 +164,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
191164
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
192165
* at least `minGuard`.
193166
*/
194-
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
167+
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
195168
exists(int index |
196169
hasFlow(_, call, index, _, e) and
197170
minGuard = getMinimumGuardConstant(call, index)
@@ -211,7 +184,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
211184
exists(GuardCondition g, Expr left, Expr right |
212185
right = g.getAChild() and
213186
value = left.getValue().toInt() and
214-
DataFlow::localExprFlow(call, right)
187+
localExprFlow(call, right)
215188
|
216189
g.ensuresEq(left, right, 0, result, true) and op = "=="
217190
or
@@ -221,9 +194,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
221194
)
222195
}
223196

224-
from ScanfFunctionCall call, Access access, int minGuard
225-
where hasNonGuardedAccess(call, access, minGuard)
226-
select access,
197+
from ScanfFunctionCall call, Expr e, int minGuard
198+
where hasNonGuardedAccess(call, e, minGuard)
199+
select e,
227200
"This variable is read, but may not have been written. " +
228201
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
229202
call.toString()
Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
2-
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
32
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
43
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
5-
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
6-
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
4+
| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
5+
| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
76
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
87
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
98
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
@@ -12,13 +11,9 @@
1211
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
1312
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
1413
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
15-
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
1614
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
1715
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
1816
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
19-
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
20-
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
2117
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
2218
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
2319
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
24-
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |

cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ int main()
4848
int i = 0;
4949

5050
scanf("%d", &i);
51-
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
51+
use(i); // GOOD. Design choice: already initialized variables are fine.
5252
}
5353

5454
{
@@ -261,23 +261,23 @@ int main()
261261
i = 0;
262262

263263
scanf("%d", &i);
264-
use(i); // BAD
264+
use(i); // GOOD
265265
}
266266

267267
{
268268
int i;
269269

270270
set_by_ref(i);
271271
scanf("%d", &i);
272-
use(i); // BAD
272+
use(i); // GOOD [FALSE POSITIVE]
273273
}
274274

275275
{
276276
int i;
277277

278278
set_by_ptr(&i);
279279
scanf("%d", &i);
280-
use(i); // BAD
280+
use(i); // GOOD [FALSE POSITIVE]
281281
}
282282

283283
{
@@ -299,15 +299,15 @@ int main()
299299
int *ptr_i = &i;
300300

301301
scanf("%d", &i);
302-
use(*ptr_i); // BAD: may not have written `i`
302+
use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
303303
}
304304

305305
{
306306
int i;
307307
int *ptr_i = &i;
308308

309309
scanf("%d", ptr_i);
310-
use(i); // BAD: may not have written `*ptr_i`
310+
use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
311311
}
312312

313313
{
@@ -427,5 +427,5 @@ void scan_and_write() {
427427
void scan_and_static_variable() {
428428
static int i;
429429
scanf("%d", &i);
430-
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
430+
use(i); // GOOD: static variables are always 0-initialized
431431
}

0 commit comments

Comments
 (0)