Skip to content

Commit 6e6f211

Browse files
authored
Merge pull request #11857 from MathiasVP/speedup-missing-check-scanf
C++: Speedup `cpp/missing-check-scanf`
2 parents 2d46272 + 59072f9 commit 6e6f211

File tree

1 file changed

+183
-77
lines changed

1 file changed

+183
-77
lines changed

cpp/ql/src/Critical/MissingCheckScanf.ql

Lines changed: 183 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -20,81 +20,190 @@ import semmle.code.cpp.dataflow.DataFlow
2020
import semmle.code.cpp.ir.IR
2121
import semmle.code.cpp.ir.ValueNumbering
2222

23-
/** An expression appearing as an output argument to a `scanf`-like call */
24-
class ScanfOutput extends Expr {
25-
ScanfFunctionCall call;
26-
int varargIndex;
27-
Instruction instr;
28-
ValueNumber valNum;
29-
30-
ScanfOutput() {
31-
this = call.getOutputArgument(varargIndex).getFullyConverted() and
32-
instr.getConvertedResultExpression() = this and
33-
valueNumber(instr) = valNum
34-
}
35-
36-
ScanfFunctionCall getCall() { result = call }
37-
38-
/**
39-
* Returns the smallest possible `scanf` return value that would indicate
40-
* success in writing this output argument.
41-
*/
42-
int getMinimumGuardConstant() {
43-
result =
44-
varargIndex + 1 -
45-
count(ScanfFormatLiteral f, int n |
46-
// Special case: %n writes to an argument without reading any input.
47-
// It does not increase the count returned by `scanf`.
48-
n <= varargIndex and f.getUse() = call and f.getConversionChar(n) = "n"
49-
)
50-
}
51-
52-
predicate hasGuardedAccess(Access e, boolean isGuarded) {
53-
e = this.getAnAccess() and
54-
if
55-
exists(int value, int minGuard | minGuard = this.getMinimumGuardConstant() |
56-
e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value
57-
or
58-
e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value
59-
or
60-
e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value
61-
)
62-
then isGuarded = true
63-
else isGuarded = false
64-
}
65-
66-
/**
67-
* Get a subsequent access of the same underlying storage,
68-
* but before it gets reset or reused in another `scanf` call.
69-
*/
70-
Access getAnAccess() {
71-
exists(Instruction dst |
72-
this.bigStep() = dst and
73-
dst.getAst() = result and
74-
valueNumber(dst) = valNum
23+
/**
24+
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
25+
*
26+
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
27+
* of `call`, and `vn` is the value number of `instr.`
28+
*/
29+
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
30+
output = call.getOutputArgument(index).getFullyConverted() and
31+
instr.getConvertedResultExpression() = output and
32+
vn.getAnInstruction() = instr
33+
}
34+
35+
/**
36+
* Holds if `instr` is control-flow reachable in 0 or more steps from
37+
* a call to a `scanf`-like function.
38+
*/
39+
pragma[nomagic]
40+
predicate fwdFlow0(Instruction instr) {
41+
isSource(_, _, instr, _, _)
42+
or
43+
exists(Instruction prev |
44+
fwdFlow0(prev) and
45+
prev.getASuccessor() = instr
46+
)
47+
}
48+
49+
/**
50+
* Holds if `instr` is part of the IR translation of `access` that
51+
* is not an expression being deallocated, and `instr` has value
52+
* number `vn`.
53+
*/
54+
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
55+
instr.getAst() = access and
56+
not any(DeallocationExpr dealloc).getFreedExpr() = access and
57+
vn.getAnInstruction() = instr
58+
}
59+
60+
/**
61+
* Holds if `instr` is part of a path from a call to a `scanf`-like function
62+
* to a use of the written variable.
63+
*/
64+
pragma[nomagic]
65+
predicate revFlow0(Instruction instr) {
66+
fwdFlow0(instr) and
67+
(
68+
isSink(instr, _, _)
69+
or
70+
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
71+
)
72+
}
73+
74+
/**
75+
* Holds if `instr` is part of a path from a call to a `scanf`-like function
76+
* that writes to a variable with value number `vn`, without passing through
77+
* redefinitions of the variable.
78+
*/
79+
pragma[nomagic]
80+
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
81+
revFlow0(instr) and
82+
(
83+
isSource(_, _, instr, vn, _)
84+
or
85+
exists(Instruction prev |
86+
fwdFlow(prev, vn) and
87+
prev.getASuccessor() = instr and
88+
not isBarrier(instr, vn)
89+
)
90+
)
91+
}
92+
93+
/**
94+
* Holds if `instr` is part of a path from a call to a `scanf`-like function
95+
* that writes to a variable with value number `vn`, without passing through
96+
* redefinitions of the variable.
97+
*
98+
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
99+
* control-flow reachable from an argument to a `scanf`-like function call.
100+
*/
101+
pragma[nomagic]
102+
predicate revFlow(Instruction instr, ValueNumber vn) {
103+
fwdFlow(instr, vn) and
104+
(
105+
isSink(instr, _, vn)
106+
or
107+
exists(Instruction succ | revFlow(succ, vn) |
108+
instr.getASuccessor() = succ and
109+
not isBarrier(succ, vn)
75110
)
76-
}
111+
)
112+
}
77113

78-
private Instruction bigStep() {
79-
result = this.smallStep(instr)
114+
/**
115+
* A type that bundles together a reachable instruction with the appropriate
116+
* value number (i.e., the value number that's transferred from the source
117+
* to the sink).
118+
*/
119+
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
120+
121+
class Node extends MkNode {
122+
ValueNumber vn;
123+
Instruction instr;
124+
125+
Node() { this = MkNode(instr, vn) }
126+
127+
final string toString() { result = instr.toString() }
128+
129+
final Node getASuccessor() { result = MkNode(instr.getASuccessor(), vn) }
130+
131+
final Location getLocation() { result = instr.getLocation() }
132+
}
133+
134+
/**
135+
* Holds if `instr` is an instruction with value number `vn` that is
136+
* used in a store operation, or is overwritten by another call to
137+
* a `scanf`-like function.
138+
*/
139+
private predicate isBarrier(Instruction instr, ValueNumber vn) {
140+
// We only need to compute barriers for instructions that we
141+
// managed to hit during the initial flow stage.
142+
revFlow0(pragma[only_bind_into](instr)) and
143+
valueNumber(instr) = vn and
144+
exists(Expr e | instr.getAst() = e |
145+
instr = any(StoreInstruction s).getDestinationAddress()
80146
or
81-
exists(Instruction i | i = this.bigStep() | result = this.smallStep(i))
82-
}
83-
84-
private Instruction smallStep(Instruction i) {
85-
instr.getASuccessor*() = i and
86-
i.getASuccessor() = result and
87-
not this.isBarrier(result)
88-
}
89-
90-
private predicate isBarrier(Instruction i) {
91-
valueNumber(i) = valNum and
92-
exists(Expr e | i.getAst() = e |
93-
i = any(StoreInstruction s).getDestinationAddress()
147+
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
148+
)
149+
}
150+
151+
/** Holds if `n1` steps to `n2` in a single step. */
152+
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
153+
154+
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
155+
156+
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
157+
158+
/**
159+
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
160+
* an instruction that is part of the translation of `access` which is a transitive
161+
* control-flow successor of `call`.
162+
*
163+
* Furthermore, `source` and `sink` have identical global value numbers.
164+
*/
165+
predicate hasFlow(
166+
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
167+
) {
168+
exists(ValueNumber vn |
169+
isSource(call, index, source, vn, _) and
170+
hasFlow(getNode(source, vn), getNode(sink, vn)) and
171+
isSink(sink, access, vn)
172+
)
173+
}
174+
175+
/**
176+
* Gets the smallest possible `scanf` return value of `call` that would indicate
177+
* success in writing the output argument at index `index`.
178+
*/
179+
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
180+
isSource(call, index, _, _, _) and
181+
result =
182+
index + 1 -
183+
count(ScanfFormatLiteral f, int n |
184+
// Special case: %n writes to an argument without reading any input.
185+
// It does not increase the count returned by `scanf`.
186+
n <= index and f.getUse() = call and f.getConversionChar(n) = "n"
187+
)
188+
}
189+
190+
/**
191+
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
192+
* at least `minGuard`.
193+
*/
194+
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
195+
exists(int index |
196+
hasFlow(_, call, index, _, e) and
197+
minGuard = getMinimumGuardConstant(call, index)
198+
|
199+
not exists(int value |
200+
e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value
94201
or
95-
[e, e.getParent().(AddressOfExpr)] instanceof ScanfOutput
202+
e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value
203+
or
204+
e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value
96205
)
97-
}
206+
)
98207
}
99208

100209
/** Returns a block guarded by the assertion of `value op call` */
@@ -112,12 +221,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
112221
)
113222
}
114223

115-
from ScanfOutput output, ScanfFunctionCall call, Access access
116-
where
117-
output.getCall() = call and
118-
output.hasGuardedAccess(access, false) and
119-
not exists(DeallocationExpr dealloc | dealloc.getFreedExpr() = access)
224+
from ScanfFunctionCall call, Access access, int minGuard
225+
where hasNonGuardedAccess(call, access, minGuard)
120226
select access,
121227
"This variable is read, but may not have been written. " +
122-
"It should be guarded by a check that the $@ returns at least " +
123-
output.getMinimumGuardConstant() + ".", call, call.toString()
228+
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
229+
call.toString()

0 commit comments

Comments
 (0)