Skip to content

Commit 4fcb576

Browse files
authored
Merge branch 'main' into print-global
2 parents 7c429fb + 1f39ec3 commit 4fcb576

File tree

93 files changed

+3113
-889
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+3113
-889
lines changed

cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll

Lines changed: 93 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,54 @@
11
/**
22
* This file provides the first phase of the `cpp/invalid-pointer-deref` query that identifies flow
33
* from an allocation to a pointer-arithmetic instruction that constructs a pointer that is out of bounds.
4+
*
5+
* Consider the following snippet:
6+
* ```cpp
7+
* 1. char* base = (char*)malloc(size);
8+
* 2. char* end = base + size;
9+
* 3. for(int *p = base; p <= end; p++) {
10+
* 4. use(*p); // BUG: Should have been bounded by `p < end`.
11+
* 5. }
12+
* ```
13+
* this file identifies the flow from `new int[size]` to `base + size`.
14+
*
15+
* This is done using the product-flow library. The configuration tracks flow from the pair
16+
* `(allocation, size of allocation)` to a pair `(a, b)` where there exists a pointer-arithmetic instruction
17+
* `pai = a + r` such that `b` is a dataflow node where `b <= r`. Because there will be a dataflow-path from
18+
* `allocation` to `a` this means that the `pai` will compute a pointer that is some number of elements beyond
19+
* the end position of the allocation. See `pointerAddInstructionHasBounds` for the implementation of this.
20+
*
21+
* In the above example, the pair `(a, b)` is `(base, size)` with `base` and `size` coming from the expression
22+
* `base + size` on line 2, which is also the pointer-arithmetic instruction. In general, the pair does not necessarily
23+
* correspond directly to the operands of the pointer-arithmetic instruction.
24+
* In the following example, the pair is again `(base, size)`, but with `base` coming from line 3 and `size` from line 2,
25+
* and the pointer-arithmetic instruction being `base + n` on line 3:
26+
* ```cpp
27+
* 1. int* base = new int[size];
28+
* 2. if(n <= size) {
29+
* 3. int* end = base + n;
30+
* 4. for(int* p = base; p <= end; ++p) {
31+
* 5. *p = 0; // BUG: Should have been bounded by `p < end`.
32+
* 6. }
33+
* 7. }
34+
* ```
35+
*
36+
* Handling false positives:
37+
*
38+
* Consider a snippet such as:
39+
* ```cpp
40+
* 1. int* base = new int[size];
41+
* 2. int n = condition() ? size : 0;
42+
* 3. if(n >= size) return;
43+
* 4. int* end = base + n;
44+
* 5. for(int* p = base; p <= end; ++p) {
45+
* 6. *p = 0; // This is fine since `end < base + size`
46+
* 7. }
47+
* ```
48+
* In order to remove this false positive we define a barrier (see `SizeBarrier::SizeBarrierConfig`) that finds the
49+
* possible guards that compares a value to the size of the allocation. In the above example, this is the `(n >= size)`
50+
* guard on line 3. `SizeBarrier::getABarrierNode` then defines any node that is guarded by such a guard as a barrier
51+
* in the dataflow configuration.
452
*/
553

654
private import cpp
@@ -42,59 +90,57 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
4290
* ```
4391
* In this case, the sink pair identified by the product flow library (without any additional barriers)
4492
* would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
45-
* instruction `pai` such that:
46-
* 1. The left-hand of `pai` flows from the allocation, and
47-
* 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`)
93+
* instruction `pai = a + b` such that:
94+
* 1. the allocation flows to `a`, and
95+
* 2. `b <= n` where `n` is the `n` in `p[n]`
4896
* but because there's a strict comparison that compares `n` against the size of the allocation this
4997
* snippet is fine.
5098
*/
51-
module Barrier2 {
52-
private class FlowState2 = int;
53-
54-
private module BarrierConfig2 implements DataFlow::ConfigSig {
99+
module SizeBarrier {
100+
private module SizeBarrierConfig implements DataFlow::ConfigSig {
55101
predicate isSource(DataFlow::Node source) {
56102
// The sources is the same as in the sources for the second
57103
// projection in the `AllocToInvalidPointerConfig` module.
58104
hasSize(_, source, _)
59105
}
60106

61107
additional predicate isSink(
62-
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state,
63-
boolean testIsTrue
108+
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int k, boolean testIsTrue
64109
) {
65-
// The sink is any "large" side of a relational comparison.
66-
g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue)
110+
// The sink is any "large" side of a relational comparison. i.e., the `right` expression
111+
// in a guard such as `left < right + k`.
112+
g.comparesLt(left.asOperand(), right.asOperand(), k, true, testIsTrue)
67113
}
68114

69115
predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) }
70116
}
71117

72-
private import DataFlow::Global<BarrierConfig2>
118+
private import DataFlow::Global<SizeBarrierConfig>
73119

74-
private FlowState2 getAFlowStateForNode(DataFlow::Node node) {
120+
private int getAFlowStateForNode(DataFlow::Node node) {
75121
exists(DataFlow::Node source |
76122
flow(source, node) and
77123
hasSize(_, source, result)
78124
)
79125
}
80126

81127
private predicate operandGuardChecks(
82-
IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge
128+
IRGuardCondition g, Operand left, Operand right, int state, boolean edge
83129
) {
84-
exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 |
130+
exists(DataFlow::Node nLeft, DataFlow::Node nRight, int k |
85131
nRight.asOperand() = right and
86132
nLeft.asOperand() = left and
87-
BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and
133+
SizeBarrierConfig::isSink(nLeft, nRight, g, k, edge) and
88134
state = getAFlowStateForNode(nRight) and
89-
state0 <= state
135+
k <= state
90136
)
91137
}
92138

93139
/**
94140
* Gets an instruction that is guarded by a guard condition which ensures that
95141
* the value of the instruction is upper-bounded by size of some allocation.
96142
*/
97-
Instruction getABarrierInstruction(FlowState2 state) {
143+
Instruction getABarrierInstruction(int state) {
98144
exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge |
99145
use = value.getAUse() and
100146
operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _,
@@ -108,17 +154,15 @@ module Barrier2 {
108154
* Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that
109155
* the value of the node is upper-bounded by size of some allocation.
110156
*/
111-
DataFlow::Node getABarrierNode(FlowState2 state) {
157+
DataFlow::Node getABarrierNode(int state) {
112158
result.asOperand() = getABarrierInstruction(state).getAUse()
113159
}
114160

115161
/**
116162
* Gets the block of a node that is guarded (see `getABarrierInstruction` or
117163
* `getABarrierNode` for the definition of what it means to be guarded).
118164
*/
119-
IRBlock getABarrierBlock(FlowState2 state) {
120-
result.getAnInstruction() = getABarrierInstruction(state)
121-
}
165+
IRBlock getABarrierBlock(int state) { result.getAnInstruction() = getABarrierInstruction(state) }
122166
}
123167

124168
private module InterestingPointerAddInstruction {
@@ -151,54 +195,38 @@ private module InterestingPointerAddInstruction {
151195
}
152196

153197
/**
154-
* A product-flow configuration for flow from an (allocation, size) pair to a
155-
* pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`.
156-
*
157-
* The goal of this query is to find patterns such as:
158-
* ```cpp
159-
* 1. char* begin = (char*)malloc(size);
160-
* 2. char* end = begin + size;
161-
* 3. for(int *p = begin; p <= end; p++) {
162-
* 4. use(*p);
163-
* 5. }
164-
* ```
165-
*
166-
* We do this by splitting the task up into two configurations:
167-
* 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and
168-
* 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3).
169-
*
170-
* Finally, the range-analysis library will find a load from (or store to) an address that
171-
* is non-strictly upper-bounded by `end` (which in this case is `*p`).
198+
* A product-flow configuration for flow from an `(allocation, size)` pair to a
199+
* pointer-arithmetic operation `pai` such that `pai <= allocation + size`.
172200
*/
173201
private module Config implements ProductFlow::StateConfigSig {
174202
class FlowState1 = Unit;
175203

176204
class FlowState2 = int;
177205

178206
predicate isSourcePair(
179-
DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2
207+
DataFlow::Node allocSource, FlowState1 unit, DataFlow::Node sizeSource, FlowState2 sizeAddend
180208
) {
181209
// In the case of an allocation like
182210
// ```cpp
183211
// malloc(size + 1);
184212
// ```
185213
// we use `state2` to remember that there was an offset (in this case an offset of `1`) added
186214
// to the size of the allocation. This state is then checked in `isSinkPair`.
187-
exists(state1) and
188-
hasSize(source1.asConvertedExpr(), source2, state2)
215+
exists(unit) and
216+
hasSize(allocSource.asConvertedExpr(), sizeSource, sizeAddend)
189217
}
190218

191219
predicate isSinkPair(
192-
DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2
220+
DataFlow::Node allocSink, FlowState1 unit, DataFlow::Node sizeSink, FlowState2 sizeAddend
193221
) {
194-
exists(state1) and
222+
exists(unit) and
195223
// We check that the delta computed by the range analysis matches the
196224
// state value that we set in `isSourcePair`.
197-
pointerAddInstructionHasBounds0(_, sink1, sink2, state2)
225+
pointerAddInstructionHasBounds0(_, allocSink, sizeSink, sizeAddend)
198226
}
199227

200228
predicate isBarrier2(DataFlow::Node node, FlowState2 state) {
201-
node = Barrier2::getABarrierNode(state)
229+
node = SizeBarrier::getABarrierNode(state)
202230
}
203231

204232
predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) }
@@ -211,7 +239,7 @@ private module Config implements ProductFlow::StateConfigSig {
211239
private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
212240

213241
/**
214-
* Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the
242+
* Holds if `pai` is non-strictly upper bounded by `sizeSink + delta` and `allocSink` is the
215243
* left operand of the pointer-arithmetic operation.
216244
*
217245
* For example in,
@@ -220,37 +248,37 @@ private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
220248
* ```
221249
* We will have:
222250
* - `pai` is `p + (size + 1)`,
223-
* - `sink1` is `p`
224-
* - `sink2` is `size`
251+
* - `allocSink` is `p`
252+
* - `sizeSink` is `size`
225253
* - `delta` is `1`.
226254
*/
227255
pragma[nomagic]
228256
private predicate pointerAddInstructionHasBounds0(
229-
PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta
257+
PointerAddInstruction pai, DataFlow::Node allocSink, DataFlow::Node sizeSink, int delta
230258
) {
231259
InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and
232-
exists(Instruction right, Instruction instr2 |
260+
exists(Instruction right, Instruction sizeInstr |
233261
pai.getRight() = right and
234-
pai.getLeft() = sink1.asInstruction() and
235-
instr2 = sink2.asInstruction() and
236-
// pai.getRight() <= sink2 + delta
237-
bounded1(right, instr2, delta) and
238-
not right = Barrier2::getABarrierInstruction(delta) and
239-
not instr2 = Barrier2::getABarrierInstruction(delta)
262+
pai.getLeft() = allocSink.asInstruction() and
263+
sizeInstr = sizeSink.asInstruction() and
264+
// pai.getRight() <= sizeSink + delta
265+
bounded1(right, sizeInstr, delta) and
266+
not right = SizeBarrier::getABarrierInstruction(delta) and
267+
not sizeInstr = SizeBarrier::getABarrierInstruction(delta)
240268
)
241269
}
242270

243271
/**
244-
* Holds if `allocation` flows to `sink1` and `sink1` represents the left-hand
245-
* side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai`
246-
* is non-strictly upper bounded by the size of `alllocation` + `delta`.
272+
* Holds if `allocation` flows to `allocSink` and `allocSink` represents the left operand
273+
* of the pointer-arithmetic instruction `pai = a + b` (i.e., `allocSink = a`), and
274+
* `b <= allocation + delta`.
247275
*/
248276
pragma[nomagic]
249277
predicate pointerAddInstructionHasBounds(
250-
DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta
278+
DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node allocSink, int delta
251279
) {
252-
exists(DataFlow::Node sink2 |
253-
AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and
254-
pointerAddInstructionHasBounds0(pai, sink1, sink2, delta)
280+
exists(DataFlow::Node sizeSink |
281+
AllocToInvalidPointerFlow::flow(allocation, _, allocSink, sizeSink) and
282+
pointerAddInstructionHasBounds0(pai, allocSink, sizeSink, delta)
255283
)
256284
}

0 commit comments

Comments
 (0)