Skip to content

Commit 1b1095e

Browse files
committed
C++: Post-update flow through &, *, +, ...
Flow from a definition by reference of a field into its object was working inconsistently and in a very syntax-dependent way. For a function `f` receiving a reference, `f(a->x)` could propagate data back to `a` via the _reverse read_ mechanism in the shared data-flow library, but for a function `g` receiving a pointer, `g(&a->x)` would not work. And `f((*a).x)` would not work either. In all cases, the issue was that the shared data-flow library propagates data backwards between `PostUpdateNode`s only, but there is no `PostUpdateNode` for `a->x` in `g(&a->x)`. This pull request inserts such post-update nodes where appropriate and links them to their neighbors. In this exapmle, flow back from the output parameter of `g` passes first to the `PostUpdateNode` of `&`, then to the (new) `PostUpdateNode` of `a->x`, and finally, as a _reverse read_ with the appropriate field projection, to `a`.
1 parent 5f74c24 commit 1b1095e

File tree

14 files changed

+547
-231
lines changed

14 files changed

+547
-231
lines changed

cpp/ql/src/semmle/code/cpp/dataflow/EscapesTree.qll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44
* passed to a function, or similar.
55
*/
66

7+
/*
8+
* Maintainer note: this file is one of several files that are similar but not
9+
* identical. Many changes to this file will also apply to the others:
10+
* - AddressConstantExpression.qll
11+
* - AddressFlow.qll
12+
* - EscapesTree.qll
13+
*/
14+
715
private import cpp
816

917
/**
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
/**
2+
* Provides a local analysis for identifying where a variable address
3+
* is effectively taken. Array-like offsets are allowed to pass through but
4+
* not field-like offsets.
5+
*
6+
* This library is specialized to meet the needs of `FlowVar.qll`.
7+
*/
8+
9+
/*
10+
* Maintainer note: this file is one of several files that are similar but not
11+
* identical. Many changes to this file will also apply to the others:
12+
* - AddressConstantExpression.qll
13+
* - AddressFlow.qll
14+
* - EscapesTree.qll
15+
*/
16+
17+
private import cpp
18+
19+
/**
20+
* Holds if `f` is an instantiation of the `std::move` or `std::forward`
21+
* template functions, these functions are essentially casts, so we treat them
22+
* as such.
23+
*/
24+
private predicate stdIdentityFunction(Function f) {
25+
f.getNamespace().getParentNamespace() instanceof GlobalNamespace and
26+
f.getNamespace().getName() = "std" and
27+
(
28+
f.getName() = "move"
29+
or
30+
f.getName() = "forward"
31+
)
32+
}
33+
34+
private predicate lvalueToLvalueStep(Expr lvalueIn, Expr lvalueOut) {
35+
lvalueIn.getConversion() = lvalueOut.(ParenthesisExpr)
36+
or
37+
// When an object is implicitly converted to a reference to one of its base
38+
// classes, it gets two `Conversion`s: there is first an implicit
39+
// `CStyleCast` to its base class followed by a `ReferenceToExpr` to a
40+
// reference to its base class. Whereas an explicit cast to the base class
41+
// would produce an rvalue, which would not be convertible to an lvalue
42+
// reference, this implicit cast instead produces an lvalue. The following
43+
// case ensures that we propagate the property of being an lvalue through
44+
// such casts.
45+
lvalueIn.getConversion() = lvalueOut and
46+
lvalueOut.(CStyleCast).isImplicit()
47+
or
48+
// C++ only
49+
lvalueIn = lvalueOut.(PrefixCrementOperation).getOperand().getFullyConverted()
50+
or
51+
// C++ only
52+
lvalueIn = lvalueOut.(Assignment).getLValue().getFullyConverted()
53+
}
54+
55+
private predicate pointerToLvalueStep(Expr pointerIn, Expr lvalueOut) {
56+
pointerIn = lvalueOut.(ArrayExpr).getArrayBase().getFullyConverted()
57+
or
58+
pointerIn = lvalueOut.(PointerDereferenceExpr).getOperand().getFullyConverted()
59+
}
60+
61+
private predicate lvalueToPointerStep(Expr lvalueIn, Expr pointerOut) {
62+
lvalueIn.getConversion() = pointerOut.(ArrayToPointerConversion)
63+
or
64+
lvalueIn = pointerOut.(AddressOfExpr).getOperand().getFullyConverted()
65+
}
66+
67+
private predicate pointerToPointerStep(Expr pointerIn, Expr pointerOut) {
68+
(
69+
pointerOut instanceof PointerAddExpr
70+
or
71+
pointerOut instanceof PointerSubExpr
72+
) and
73+
pointerIn = pointerOut.getAChild().getFullyConverted() and
74+
pointerIn.getUnspecifiedType() instanceof PointerType
75+
or
76+
pointerIn = pointerOut.(UnaryPlusExpr).getOperand().getFullyConverted()
77+
or
78+
pointerIn.getConversion() = pointerOut.(Cast)
79+
or
80+
pointerIn.getConversion() = pointerOut.(ParenthesisExpr)
81+
or
82+
pointerIn = pointerOut.(ConditionalExpr).getThen().getFullyConverted()
83+
or
84+
pointerIn = pointerOut.(ConditionalExpr).getElse().getFullyConverted()
85+
or
86+
pointerIn = pointerOut.(CommaExpr).getRightOperand().getFullyConverted()
87+
or
88+
pointerIn = pointerOut.(StmtExpr).getResultExpr().getFullyConverted()
89+
}
90+
91+
private predicate lvalueToReferenceStep(Expr lvalueIn, Expr referenceOut) {
92+
lvalueIn.getConversion() = referenceOut.(ReferenceToExpr)
93+
}
94+
95+
private predicate referenceToLvalueStep(Expr referenceIn, Expr lvalueOut) {
96+
referenceIn.getConversion() = lvalueOut.(ReferenceDereferenceExpr)
97+
}
98+
99+
private predicate referenceToReferenceStep(Expr referenceIn, Expr referenceOut) {
100+
referenceOut =
101+
any(FunctionCall call |
102+
stdIdentityFunction(call.getTarget()) and
103+
referenceIn = call.getArgument(0).getFullyConverted()
104+
)
105+
or
106+
referenceIn.getConversion() = referenceOut.(Cast)
107+
or
108+
referenceIn.getConversion() = referenceOut.(ParenthesisExpr)
109+
}
110+
111+
private predicate assignmentTo(Expr updated, ControlFlowNode node) {
112+
updated = node.(Assignment).getLValue().getFullyConverted()
113+
or
114+
updated = node.(CrementOperation).getOperand().getFullyConverted()
115+
}
116+
117+
private predicate lvalueToUpdate(Expr lvalue, Expr outer, ControlFlowNode node) {
118+
(
119+
exists(Call call | node = call |
120+
outer = call.getQualifier().getFullyConverted() and
121+
outer.getUnspecifiedType() instanceof Class and
122+
not call.getTarget().hasSpecifier("const")
123+
)
124+
or
125+
assignmentTo(outer, node)
126+
or
127+
exists(DotFieldAccess fa |
128+
// `fa.otherField = ...` or `f(&fa)` or similar
129+
outer = fa.getQualifier().getFullyConverted() and
130+
valueToUpdate(fa, _, node)
131+
)
132+
) and
133+
lvalue = outer
134+
or
135+
exists(Expr lvalueMid |
136+
lvalueToLvalueStep(lvalue, lvalueMid) and
137+
lvalueToUpdate(lvalueMid, outer, node)
138+
)
139+
or
140+
exists(Expr pointerMid |
141+
lvalueToPointerStep(lvalue, pointerMid) and
142+
pointerToUpdate(pointerMid, outer, node)
143+
)
144+
or
145+
exists(Expr referenceMid |
146+
lvalueToReferenceStep(lvalue, referenceMid) and
147+
referenceToUpdate(referenceMid, outer, node)
148+
)
149+
}
150+
151+
private predicate pointerToUpdate(Expr pointer, Expr outer, ControlFlowNode node) {
152+
(
153+
exists(Call call | node = call |
154+
outer = call.getAnArgument().getFullyConverted() and
155+
exists(PointerType pt | pt = outer.getType().stripTopLevelSpecifiers() |
156+
not pt.getBaseType().isConst()
157+
)
158+
or
159+
outer = call.getQualifier().getFullyConverted() and
160+
outer.getUnspecifiedType() instanceof PointerType and
161+
not call.getTarget().hasSpecifier("const")
162+
)
163+
or
164+
exists(PointerFieldAccess fa |
165+
// `fa.otherField = ...` or `f(&fa)` or similar
166+
outer = fa.getQualifier().getFullyConverted() and
167+
valueToUpdate(fa, _, node)
168+
)
169+
) and
170+
pointer = outer
171+
or
172+
exists(Expr lvalueMid |
173+
pointerToLvalueStep(pointer, lvalueMid) and
174+
lvalueToUpdate(lvalueMid, outer, node)
175+
)
176+
or
177+
exists(Expr pointerMid |
178+
pointerToPointerStep(pointer, pointerMid) and
179+
pointerToUpdate(pointerMid, outer, node)
180+
)
181+
}
182+
183+
private predicate referenceToUpdate(Expr reference, Expr outer, ControlFlowNode node) {
184+
exists(Call call |
185+
node = call and
186+
outer = call.getAnArgument().getFullyConverted() and
187+
not stdIdentityFunction(call.getTarget()) and
188+
exists(ReferenceType rt | rt = outer.getType().stripTopLevelSpecifiers() |
189+
not rt.getBaseType().isConst()
190+
)
191+
) and
192+
reference = outer
193+
or
194+
exists(Expr lvalueMid |
195+
referenceToLvalueStep(reference, lvalueMid) and
196+
lvalueToUpdate(lvalueMid, outer, node)
197+
)
198+
or
199+
exists(Expr referenceMid |
200+
referenceToReferenceStep(reference, referenceMid) and
201+
referenceToUpdate(referenceMid, outer, node)
202+
)
203+
}
204+
205+
/**
206+
* Holds if `node` is a control-flow node that may modify `inner` (or what it
207+
* points to) through `outer`. The two expressions may be `Conversion`s. Plain
208+
* assignments to variables are not included in this predicate since they are
209+
* assumed to be analyzed by SSA or similar means.
210+
*
211+
* For example, in `f(& (*a).x)`, there are two results:
212+
* - `inner` = `... .x`, `outer` = `&...`, `node` = `f(...)`.
213+
* - `inner` = `a`, `outer` = `(...)`, `node` = `f(...)`.
214+
*/
215+
cached
216+
predicate valueToUpdate(Expr inner, Expr outer, ControlFlowNode node) {
217+
(
218+
lvalueToUpdate(inner, outer, node)
219+
or
220+
pointerToUpdate(inner, outer, node)
221+
or
222+
referenceToUpdate(inner, outer, node)
223+
) and
224+
(
225+
inner instanceof VariableAccess and
226+
// Don't track non-field assignments
227+
(assignmentTo(outer, _) implies inner instanceof FieldAccess)
228+
or
229+
inner instanceof ThisExpr
230+
or
231+
inner instanceof Call
232+
// `baseValue` could also be `*` or `ReferenceDereferenceExpr`, but we
233+
// can't do anything useful with those at the moment.
234+
)
235+
}

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ private newtype TNode =
2020
TInstanceParameterNode(MemberFunction f) { exists(f.getBlock()) and not f.isStatic() } or
2121
TPreConstructorInitThis(ConstructorFieldInit cfi) or
2222
TPostConstructorInitThis(ConstructorFieldInit cfi) or
23-
TThisArgumentPostUpdate(ThisExpr ta) {
24-
exists(Call c, int i |
25-
ta = c.getArgument(i) and
26-
not c.getTarget().getParameter(i).getUnderlyingType().(PointerType).getBaseType().isConst()
23+
TInnerPartialDefinitionNode(Expr e) {
24+
exists(PartialDefinition def, Expr outer |
25+
def.definesExpressions(e, outer) and
26+
// This condition ensures that we don't get two post-update nodes sharing
27+
// the same pre-update node.
28+
e != outer
2729
)
2830
} or
2931
TUninitializedNode(LocalVariable v) { not v.hasInitializer() } or
@@ -66,7 +68,7 @@ class Node extends TNode {
6668
* a partial definition of `&x`).
6769
*/
6870
Expr asPartialDefinition() {
69-
result = this.(PartialDefinitionNode).getPartialDefinition().getDefinedExpr()
71+
this.(PartialDefinitionNode).getPartialDefinition().definesExpressions(_, result)
7072
}
7173

7274
/**
@@ -198,25 +200,23 @@ class ImplicitParameterNode extends ParameterNode, TInstanceParameterNode {
198200
* returned. This node will have its `getArgument()` equal to `&x`.
199201
*/
200202
class DefinitionByReferenceNode extends PartialDefinitionNode {
201-
VariableAccess va;
203+
Expr inner;
202204
Expr argument;
203205

204206
DefinitionByReferenceNode() {
205-
exists(DefinitionByReference def |
206-
def = this.getPartialDefinition() and
207-
argument = def.getDefinedExpr() and
208-
va = def.getVariableAccess()
209-
)
207+
this.getPartialDefinition().(DefinitionByReference).definesExpressions(inner, argument)
210208
}
211209

212-
override Function getFunction() { result = va.getEnclosingFunction() }
210+
override Function getFunction() { result = inner.getEnclosingFunction() }
213211

214-
override Type getType() { result = va.getType() }
212+
override Type getType() { result = inner.getType() }
215213

216214
override string toString() { result = "ref arg " + argument.toString() }
217215

218216
override Location getLocation() { result = argument.getLocation() }
219217

218+
override ExprNode getPreUpdateNode() { result.getExpr() = argument }
219+
220220
/** Gets the argument corresponding to this node. */
221221
Expr getArgument() { result = argument }
222222

@@ -297,7 +297,7 @@ private class PartialDefinitionNode extends PostUpdateNode, TPartialDefinitionNo
297297

298298
PartialDefinitionNode() { this = TPartialDefinitionNode(pd) }
299299

300-
override Node getPreUpdateNode() { result.asExpr() = pd.getDefinedExpr() }
300+
override Node getPreUpdateNode() { pd.definesExpressions(_, result.asExpr()) }
301301

302302
override Location getLocation() { result = pd.getActualLocation() }
303303

@@ -306,14 +306,23 @@ private class PartialDefinitionNode extends PostUpdateNode, TPartialDefinitionNo
306306
override string toString() { result = getPreUpdateNode().toString() + " [post update]" }
307307
}
308308

309-
private class ThisArgumentPostUpdateNode extends PostUpdateNode, TThisArgumentPostUpdate {
310-
ThisExpr thisExpr;
309+
/**
310+
* A post-update node on the `e->f` in `f(&e->f)` (and other forms).
311+
*/
312+
private class InnerPartialDefinitionNode extends TInnerPartialDefinitionNode, PostUpdateNode {
313+
Expr e;
314+
315+
InnerPartialDefinitionNode() { this = TInnerPartialDefinitionNode(e) }
311316

312-
ThisArgumentPostUpdateNode() { this = TThisArgumentPostUpdate(thisExpr) }
317+
override ExprNode getPreUpdateNode() { result.getExpr() = e }
313318

314-
override Node getPreUpdateNode() { result.asExpr() = thisExpr }
319+
override Function getFunction() { result = e.getEnclosingFunction() }
315320

316-
override string toString() { result = "ref arg this" }
321+
override Type getType() { result = e.getType() }
322+
323+
override string toString() { result = e.toString() + " [inner post update]" }
324+
325+
override Location getLocation() { result = e.getLocation() }
317326
}
318327

319328
/**
@@ -520,6 +529,14 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
520529
or
521530
// post-update-`this` -> following-`this`-ref
522531
ThisFlow::adjacentThisRefs(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
532+
or
533+
// In `f(&x->a)`, this step provides the flow from post-`&` to post-`x->a`,
534+
// from which there is field flow to `x` via reverse read.
535+
exists(PartialDefinition def, Expr inner, Expr outer |
536+
def.definesExpressions(inner, outer) and
537+
inner = nodeTo.(InnerPartialDefinitionNode).getPreUpdateNode().asExpr() and
538+
outer = nodeFrom.(PartialDefinitionNode).getPreUpdateNode().asExpr()
539+
)
523540
}
524541

525542
/**

0 commit comments

Comments
 (0)