1
1
/**
2
2
* This file provides the first phase of the `cpp/invalid-pointer-deref` query that identifies flow
3
3
* from an allocation to a pointer-arithmetic instruction that constructs a pointer that is out of bounds.
4
+ *
5
+ * Consider the following snippet:
6
+ * ```cpp
7
+ * 1. char* base = (char*)malloc(size);
8
+ * 2. char* end = base + size;
9
+ * 3. for(int *p = base; p <= end; p++) {
10
+ * 4. use(*p); // BUG: Should have been bounded by `p < end`.
11
+ * 5. }
12
+ * ```
13
+ * this file identifies the flow from `new int[size]` to `base + size`.
14
+ *
15
+ * This is done using the product-flow library. The configuration tracks flow from the pair
16
+ * `(allocation, size of allocation)` to a pair `(a, b)` where there exists a pointer-arithmetic instruction
17
+ * `pai = a + r` such that `b` is a dataflow node where `b <= r`. Because there will be a dataflow-path from
18
+ * `allocation` to `a` this means that the `pai` will compute a pointer that is some number of elements beyond
19
+ * the end position of the allocation. See `pointerAddInstructionHasBounds` for the implementation of this.
20
+ *
21
+ * In the above example, the pair `(a, b)` is `(base, size)` with `base` and `size` coming from the expression
22
+ * `base + size` on line 2, which is also the pointer-arithmetic instruction. In general, the pair does not necessarily
23
+ * correspond directly to the operands of the pointer-arithmetic instruction.
24
+ * In the following example, the pair is again `(base, size)`, but with `base` coming from line 3 and `size` from line 2,
25
+ * and the pointer-arithmetic instruction being `base + n` on line 3:
26
+ * ```cpp
27
+ * 1. int* base = new int[size];
28
+ * 2. if(n <= size) {
29
+ * 3. int* end = base + n;
30
+ * 4. for(int* p = base; p <= end; ++p) {
31
+ * 5. *p = 0; // BUG: Should have been bounded by `p < end`.
32
+ * 6. }
33
+ * 7. }
34
+ * ```
35
+ *
36
+ * Handling false positives:
37
+ *
38
+ * Consider a snippet such as:
39
+ * ```cpp
40
+ * 1. int* base = new int[size];
41
+ * 2. int n = condition() ? size : 0;
42
+ * 3. if(n >= size) return;
43
+ * 4. int* end = base + n;
44
+ * 5. for(int* p = base; p <= end; ++p) {
45
+ * 6. *p = 0; // This is fine since `end < base + size`
46
+ * 7. }
47
+ * ```
48
+ * In order to remove this false positive we define a barrier (see `SizeBarrier::SizeBarrierConfig`) that finds the
49
+ * possible guards that compares a value to the size of the allocation. In the above example, this is the `(n >= size)`
50
+ * guard on line 3. `SizeBarrier::getABarrierNode` then defines any node that is guarded by such a guard as a barrier
51
+ * in the dataflow configuration.
4
52
*/
5
53
6
54
private import cpp
@@ -42,59 +90,57 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
42
90
* ```
43
91
* In this case, the sink pair identified by the product flow library (without any additional barriers)
44
92
* would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
45
- * instruction `pai` such that:
46
- * 1. The left-hand of `pai` flows from the allocation , and
47
- * 2. The right-hand of `pai` is non-strictly upper bounded by ` n` ( where `n` is the `n` in `p[n]`)
93
+ * instruction `pai = a + b ` such that:
94
+ * 1. the allocation flows to `a` , and
95
+ * 2. `b <= n` where `n` is the `n` in `p[n]`
48
96
* but because there's a strict comparison that compares `n` against the size of the allocation this
49
97
* snippet is fine.
50
98
*/
51
- module Barrier2 {
52
- private class FlowState2 = int ;
53
-
54
- private module BarrierConfig2 implements DataFlow:: ConfigSig {
99
+ module SizeBarrier {
100
+ private module SizeBarrierConfig implements DataFlow:: ConfigSig {
55
101
predicate isSource ( DataFlow:: Node source ) {
56
102
// The sources is the same as in the sources for the second
57
103
// projection in the `AllocToInvalidPointerConfig` module.
58
104
hasSize ( _, source , _)
59
105
}
60
106
61
107
additional predicate isSink (
62
- DataFlow:: Node left , DataFlow:: Node right , IRGuardCondition g , FlowState2 state ,
63
- boolean testIsTrue
108
+ DataFlow:: Node left , DataFlow:: Node right , IRGuardCondition g , int k , boolean testIsTrue
64
109
) {
65
- // The sink is any "large" side of a relational comparison.
66
- g .comparesLt ( left .asOperand ( ) , right .asOperand ( ) , state , true , testIsTrue )
110
+ // The sink is any "large" side of a relational comparison. i.e., the `right` expression
111
+ // in a guard such as `left < right + k`.
112
+ g .comparesLt ( left .asOperand ( ) , right .asOperand ( ) , k , true , testIsTrue )
67
113
}
68
114
69
115
predicate isSink ( DataFlow:: Node sink ) { isSink ( _, sink , _, _, _) }
70
116
}
71
117
72
- private import DataFlow:: Global< BarrierConfig2 >
118
+ private import DataFlow:: Global< SizeBarrierConfig >
73
119
74
- private FlowState2 getAFlowStateForNode ( DataFlow:: Node node ) {
120
+ private int getAFlowStateForNode ( DataFlow:: Node node ) {
75
121
exists ( DataFlow:: Node source |
76
122
flow ( source , node ) and
77
123
hasSize ( _, source , result )
78
124
)
79
125
}
80
126
81
127
private predicate operandGuardChecks (
82
- IRGuardCondition g , Operand left , Operand right , FlowState2 state , boolean edge
128
+ IRGuardCondition g , Operand left , Operand right , int state , boolean edge
83
129
) {
84
- exists ( DataFlow:: Node nLeft , DataFlow:: Node nRight , FlowState2 state0 |
130
+ exists ( DataFlow:: Node nLeft , DataFlow:: Node nRight , int k |
85
131
nRight .asOperand ( ) = right and
86
132
nLeft .asOperand ( ) = left and
87
- BarrierConfig2 :: isSink ( nLeft , nRight , g , state0 , edge ) and
133
+ SizeBarrierConfig :: isSink ( nLeft , nRight , g , k , edge ) and
88
134
state = getAFlowStateForNode ( nRight ) and
89
- state0 <= state
135
+ k <= state
90
136
)
91
137
}
92
138
93
139
/**
94
140
* Gets an instruction that is guarded by a guard condition which ensures that
95
141
* the value of the instruction is upper-bounded by size of some allocation.
96
142
*/
97
- Instruction getABarrierInstruction ( FlowState2 state ) {
143
+ Instruction getABarrierInstruction ( int state ) {
98
144
exists ( IRGuardCondition g , ValueNumber value , Operand use , boolean edge |
99
145
use = value .getAUse ( ) and
100
146
operandGuardChecks ( pragma [ only_bind_into ] ( g ) , pragma [ only_bind_into ] ( use ) , _,
@@ -108,17 +154,15 @@ module Barrier2 {
108
154
* Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that
109
155
* the value of the node is upper-bounded by size of some allocation.
110
156
*/
111
- DataFlow:: Node getABarrierNode ( FlowState2 state ) {
157
+ DataFlow:: Node getABarrierNode ( int state ) {
112
158
result .asOperand ( ) = getABarrierInstruction ( state ) .getAUse ( )
113
159
}
114
160
115
161
/**
116
162
* Gets the block of a node that is guarded (see `getABarrierInstruction` or
117
163
* `getABarrierNode` for the definition of what it means to be guarded).
118
164
*/
119
- IRBlock getABarrierBlock ( FlowState2 state ) {
120
- result .getAnInstruction ( ) = getABarrierInstruction ( state )
121
- }
165
+ IRBlock getABarrierBlock ( int state ) { result .getAnInstruction ( ) = getABarrierInstruction ( state ) }
122
166
}
123
167
124
168
private module InterestingPointerAddInstruction {
@@ -151,54 +195,38 @@ private module InterestingPointerAddInstruction {
151
195
}
152
196
153
197
/**
154
- * A product-flow configuration for flow from an (allocation, size) pair to a
155
- * pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`.
156
- *
157
- * The goal of this query is to find patterns such as:
158
- * ```cpp
159
- * 1. char* begin = (char*)malloc(size);
160
- * 2. char* end = begin + size;
161
- * 3. for(int *p = begin; p <= end; p++) {
162
- * 4. use(*p);
163
- * 5. }
164
- * ```
165
- *
166
- * We do this by splitting the task up into two configurations:
167
- * 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and
168
- * 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3).
169
- *
170
- * Finally, the range-analysis library will find a load from (or store to) an address that
171
- * is non-strictly upper-bounded by `end` (which in this case is `*p`).
198
+ * A product-flow configuration for flow from an `(allocation, size)` pair to a
199
+ * pointer-arithmetic operation `pai` such that `pai <= allocation + size`.
172
200
*/
173
201
private module Config implements ProductFlow:: StateConfigSig {
174
202
class FlowState1 = Unit ;
175
203
176
204
class FlowState2 = int ;
177
205
178
206
predicate isSourcePair (
179
- DataFlow:: Node source1 , FlowState1 state1 , DataFlow:: Node source2 , FlowState2 state2
207
+ DataFlow:: Node allocSource , FlowState1 unit , DataFlow:: Node sizeSource , FlowState2 sizeAddend
180
208
) {
181
209
// In the case of an allocation like
182
210
// ```cpp
183
211
// malloc(size + 1);
184
212
// ```
185
213
// we use `state2` to remember that there was an offset (in this case an offset of `1`) added
186
214
// to the size of the allocation. This state is then checked in `isSinkPair`.
187
- exists ( state1 ) and
188
- hasSize ( source1 .asConvertedExpr ( ) , source2 , state2 )
215
+ exists ( unit ) and
216
+ hasSize ( allocSource .asConvertedExpr ( ) , sizeSource , sizeAddend )
189
217
}
190
218
191
219
predicate isSinkPair (
192
- DataFlow:: Node sink1 , FlowState1 state1 , DataFlow:: Node sink2 , FlowState2 state2
220
+ DataFlow:: Node allocSink , FlowState1 unit , DataFlow:: Node sizeSink , FlowState2 sizeAddend
193
221
) {
194
- exists ( state1 ) and
222
+ exists ( unit ) and
195
223
// We check that the delta computed by the range analysis matches the
196
224
// state value that we set in `isSourcePair`.
197
- pointerAddInstructionHasBounds0 ( _, sink1 , sink2 , state2 )
225
+ pointerAddInstructionHasBounds0 ( _, allocSink , sizeSink , sizeAddend )
198
226
}
199
227
200
228
predicate isBarrier2 ( DataFlow:: Node node , FlowState2 state ) {
201
- node = Barrier2 :: getABarrierNode ( state )
229
+ node = SizeBarrier :: getABarrierNode ( state )
202
230
}
203
231
204
232
predicate isBarrierIn1 ( DataFlow:: Node node ) { isSourcePair ( node , _, _, _) }
@@ -211,7 +239,7 @@ private module Config implements ProductFlow::StateConfigSig {
211
239
private module AllocToInvalidPointerFlow = ProductFlow:: GlobalWithState< Config > ;
212
240
213
241
/**
214
- * Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1 ` is the
242
+ * Holds if `pai` is non-strictly upper bounded by `sizeSink + delta` and `allocSink ` is the
215
243
* left operand of the pointer-arithmetic operation.
216
244
*
217
245
* For example in,
@@ -220,37 +248,37 @@ private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
220
248
* ```
221
249
* We will have:
222
250
* - `pai` is `p + (size + 1)`,
223
- * - `sink1 ` is `p`
224
- * - `sink2 ` is `size`
251
+ * - `allocSink ` is `p`
252
+ * - `sizeSink ` is `size`
225
253
* - `delta` is `1`.
226
254
*/
227
255
pragma [ nomagic]
228
256
private predicate pointerAddInstructionHasBounds0 (
229
- PointerAddInstruction pai , DataFlow:: Node sink1 , DataFlow:: Node sink2 , int delta
257
+ PointerAddInstruction pai , DataFlow:: Node allocSink , DataFlow:: Node sizeSink , int delta
230
258
) {
231
259
InterestingPointerAddInstruction:: isInteresting ( pragma [ only_bind_into ] ( pai ) ) and
232
- exists ( Instruction right , Instruction instr2 |
260
+ exists ( Instruction right , Instruction sizeInstr |
233
261
pai .getRight ( ) = right and
234
- pai .getLeft ( ) = sink1 .asInstruction ( ) and
235
- instr2 = sink2 .asInstruction ( ) and
236
- // pai.getRight() <= sink2 + delta
237
- bounded1 ( right , instr2 , delta ) and
238
- not right = Barrier2 :: getABarrierInstruction ( delta ) and
239
- not instr2 = Barrier2 :: getABarrierInstruction ( delta )
262
+ pai .getLeft ( ) = allocSink .asInstruction ( ) and
263
+ sizeInstr = sizeSink .asInstruction ( ) and
264
+ // pai.getRight() <= sizeSink + delta
265
+ bounded1 ( right , sizeInstr , delta ) and
266
+ not right = SizeBarrier :: getABarrierInstruction ( delta ) and
267
+ not sizeInstr = SizeBarrier :: getABarrierInstruction ( delta )
240
268
)
241
269
}
242
270
243
271
/**
244
- * Holds if `allocation` flows to `sink1 ` and `sink1 ` represents the left-hand
245
- * side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai`
246
- * is non-strictly upper bounded by the size of `alllocation` + ` delta`.
272
+ * Holds if `allocation` flows to `allocSink ` and `allocSink ` represents the left operand
273
+ * of the pointer-arithmetic instruction `pai = a + b` (i.e., `allocSink = a`), and
274
+ * `b <= allocation + delta`.
247
275
*/
248
276
pragma [ nomagic]
249
277
predicate pointerAddInstructionHasBounds (
250
- DataFlow:: Node allocation , PointerAddInstruction pai , DataFlow:: Node sink1 , int delta
278
+ DataFlow:: Node allocation , PointerAddInstruction pai , DataFlow:: Node allocSink , int delta
251
279
) {
252
- exists ( DataFlow:: Node sink2 |
253
- AllocToInvalidPointerFlow:: flow ( allocation , _, sink1 , sink2 ) and
254
- pointerAddInstructionHasBounds0 ( pai , sink1 , sink2 , delta )
280
+ exists ( DataFlow:: Node sizeSink |
281
+ AllocToInvalidPointerFlow:: flow ( allocation , _, allocSink , sizeSink ) and
282
+ pointerAddInstructionHasBounds0 ( pai , allocSink , sizeSink , delta )
255
283
)
256
284
}
0 commit comments