Skip to content

Commit 5099de5

Browse files
committed
C++: Split the query into 4 files.
1 parent a038b38 commit 5099de5

File tree

4 files changed

+465
-447
lines changed

4 files changed

+465
-447
lines changed
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
private import cpp
2+
private import semmle.code.cpp.ir.dataflow.internal.ProductFlow
3+
private import semmle.code.cpp.ir.ValueNumbering
4+
private import semmle.code.cpp.controlflow.IRGuards
5+
private import semmle.code.cpp.ir.IR
6+
private import codeql.util.Unit
7+
private import RangeAnalysisUtil
8+
9+
private VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result }
10+
11+
/**
12+
* Holds if `(n, state)` pair represents the source of flow for the size
13+
* expression associated with `alloc`.
14+
*/
15+
predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
16+
exists(VariableAccess va, Expr size, int delta |
17+
size = alloc.getSizeExpr() and
18+
// Get the unique variable in a size expression like `x` in `malloc(x + 1)`.
19+
va = unique( | | getAVariableAccess(size)) and
20+
// Compute `delta` as the constant difference between `x` and `x + 1`.
21+
bounded1(any(Instruction instr | instr.getUnconvertedResultExpression() = size),
22+
any(LoadInstruction load | load.getUnconvertedResultExpression() = va), delta) and
23+
n.asConvertedExpr() = va.getFullyConverted() and
24+
state = delta
25+
)
26+
}
27+
28+
/**
29+
* A module that encapsulates a barrier guard to remove false positives from flow like:
30+
* ```cpp
31+
* char *p = new char[size];
32+
* // ...
33+
* unsigned n = size;
34+
* // ...
35+
* if(n < size) {
36+
* use(*p[n]);
37+
* }
38+
* ```
39+
* In this case, the sink pair identified by the product flow library (without any additional barriers)
40+
* would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
41+
* instruction `pai` such that:
42+
* 1. The left-hand of `pai` flows from the allocation, and
43+
* 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`)
44+
* but because there's a strict comparison that compares `n` against the size of the allocation this
45+
* snippet is fine.
46+
*/
47+
module Barrier2 {
48+
private class FlowState2 = int;
49+
50+
private module BarrierConfig2 implements DataFlow::ConfigSig {
51+
predicate isSource(DataFlow::Node source) {
52+
// The sources is the same as in the sources for the second
53+
// projection in the `AllocToInvalidPointerConfig` module.
54+
hasSize(_, source, _)
55+
}
56+
57+
additional predicate isSink(
58+
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state,
59+
boolean testIsTrue
60+
) {
61+
// The sink is any "large" side of a relational comparison.
62+
g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue)
63+
}
64+
65+
predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) }
66+
}
67+
68+
private import DataFlow::Global<BarrierConfig2>
69+
70+
private FlowState2 getAFlowStateForNode(DataFlow::Node node) {
71+
exists(DataFlow::Node source |
72+
flow(source, node) and
73+
hasSize(_, source, result)
74+
)
75+
}
76+
77+
private predicate operandGuardChecks(
78+
IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge
79+
) {
80+
exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 |
81+
nRight.asOperand() = right and
82+
nLeft.asOperand() = left and
83+
BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and
84+
state = getAFlowStateForNode(nRight) and
85+
state0 <= state
86+
)
87+
}
88+
89+
Instruction getABarrierInstruction(FlowState2 state) {
90+
exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge |
91+
use = value.getAUse() and
92+
operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _,
93+
pragma[only_bind_into](state), pragma[only_bind_into](edge)) and
94+
result = value.getAnInstruction() and
95+
g.controls(result.getBlock(), edge)
96+
)
97+
}
98+
99+
DataFlow::Node getABarrierNode(FlowState2 state) {
100+
result.asOperand() = getABarrierInstruction(state).getAUse()
101+
}
102+
103+
IRBlock getABarrierBlock(FlowState2 state) {
104+
result.getAnInstruction() = getABarrierInstruction(state)
105+
}
106+
}
107+
108+
module InterestingPointerAddInstruction {
109+
private module PointerAddInstructionConfig implements DataFlow::ConfigSig {
110+
predicate isSource(DataFlow::Node source) {
111+
// The sources is the same as in the sources for the second
112+
// projection in the `AllocToInvalidPointerConfig` module.
113+
hasSize(source.asConvertedExpr(), _, _)
114+
}
115+
116+
predicate isSink(DataFlow::Node sink) {
117+
sink.asInstruction() = any(PointerAddInstruction pai).getLeft()
118+
}
119+
}
120+
121+
private import DataFlow::Global<PointerAddInstructionConfig>
122+
123+
predicate isInteresting(PointerAddInstruction pai) {
124+
exists(DataFlow::Node n |
125+
n.asInstruction() = pai.getLeft() and
126+
flowTo(n)
127+
)
128+
}
129+
}
130+
131+
/**
132+
* A product-flow configuration for flow from an (allocation, size) pair to a
133+
* pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`.
134+
*
135+
* The goal of this query is to find patterns such as:
136+
* ```cpp
137+
* 1. char* begin = (char*)malloc(size);
138+
* 2. char* end = begin + size;
139+
* 3. for(int *p = begin; p <= end; p++) {
140+
* 4. use(*p);
141+
* 5. }
142+
* ```
143+
*
144+
* We do this by splitting the task up into two configurations:
145+
* 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and
146+
* 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3).
147+
*
148+
* Finally, the range-analysis library will find a load from (or store to) an address that
149+
* is non-strictly upper-bounded by `end` (which in this case is `*p`).
150+
*/
151+
private module Config implements ProductFlow::StateConfigSig {
152+
class FlowState1 = Unit;
153+
154+
class FlowState2 = int;
155+
156+
predicate isSourcePair(
157+
DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2
158+
) {
159+
// In the case of an allocation like
160+
// ```cpp
161+
// malloc(size + 1);
162+
// ```
163+
// we use `state2` to remember that there was an offset (in this case an offset of `1`) added
164+
// to the size of the allocation. This state is then checked in `isSinkPair`.
165+
exists(state1) and
166+
hasSize(source1.asConvertedExpr(), source2, state2)
167+
}
168+
169+
predicate isSinkPair(
170+
DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2
171+
) {
172+
exists(state1) and
173+
// We check that the delta computed by the range analysis matches the
174+
// state value that we set in `isSourcePair`.
175+
pointerAddInstructionHasBounds0(_, sink1, sink2, state2)
176+
}
177+
178+
predicate isBarrier2(DataFlow::Node node, FlowState2 state) {
179+
node = Barrier2::getABarrierNode(state)
180+
}
181+
182+
predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) }
183+
184+
predicate isBarrierOut2(DataFlow::Node node) {
185+
node = any(DataFlow::SsaPhiNode phi).getAnInput(true)
186+
}
187+
}
188+
189+
private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
190+
191+
/**
192+
* Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the
193+
* left operand of the pointer-arithmetic operation.
194+
*
195+
* For example in,
196+
* ```cpp
197+
* char* end = p + (size + 1);
198+
* ```
199+
* We will have:
200+
* - `pai` is `p + (size + 1)`,
201+
* - `sink1` is `p`
202+
* - `sink2` is `size`
203+
* - `delta` is `1`.
204+
*/
205+
pragma[nomagic]
206+
private predicate pointerAddInstructionHasBounds0(
207+
PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta
208+
) {
209+
InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and
210+
exists(Instruction right, Instruction instr2 |
211+
pai.getRight() = right and
212+
pai.getLeft() = sink1.asInstruction() and
213+
instr2 = sink2.asInstruction() and
214+
bounded1(right, instr2, delta) and
215+
not right = Barrier2::getABarrierInstruction(delta) and
216+
not instr2 = Barrier2::getABarrierInstruction(delta)
217+
)
218+
}
219+
220+
pragma[nomagic]
221+
predicate pointerAddInstructionHasBounds(
222+
DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta
223+
) {
224+
exists(DataFlow::Node sink2 |
225+
AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and
226+
pointerAddInstructionHasBounds0(pai, sink1, sink2, delta)
227+
)
228+
}

0 commit comments

Comments
 (0)