Skip to content

Commit 26e5320

Browse files
authored
Merge pull request #20901 from MathiasVP/canonical-content
C++: Don't use `Field`s to define `FieldContent`
2 parents 1c2d8bb + 861ca75 commit 26e5320

File tree

7 files changed

+195
-31
lines changed

7 files changed

+195
-31
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 148 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,38 +2078,151 @@ predicate localExprFlow(Expr e1, Expr e2) {
20782078
localExprFlowPlus(e1, e2)
20792079
}
20802080

2081+
/**
2082+
* A canonical representation of a field.
2083+
*
2084+
* For performance reasons we want a unique `Content` that represents
2085+
* a given field across any template instantiation of a class.
2086+
*
2087+
* This is possible in _almost_ all cases, but there are cases where it is
2088+
* not possible to map between a field in the uninstantiated template to a
2089+
* field in the instantiated template. This happens in the case of local class
2090+
* definitions (because the local class is not the template that constructs
2091+
* the instantiation - it is the enclosing function). So this abstract class
2092+
* has two implementations: a non-local case (where we can represent a
2093+
* canonical field as the field declaration from an uninstantiated class
2094+
* template or a non-templated class), and a local case (where we simply use
2095+
* the field from the instantiated class).
2096+
*/
2097+
abstract private class CanonicalField extends Field {
2098+
/** Gets a field represented by this canonical field. */
2099+
abstract Field getAField();
2100+
2101+
/**
2102+
* Gets a class that declares a field represented by this canonical field.
2103+
*/
2104+
abstract Class getADeclaringType();
2105+
2106+
/**
2107+
* Gets a type that this canonical field may have. Note that this may
2108+
* not be a unique type. For example, consider this case:
2109+
* ```
2110+
* template<typename T>
2111+
* struct S { T x; };
2112+
*
2113+
* S<int> s1;
2114+
* S<char> s2;
2115+
* ```
2116+
* In this case the canonical field corresponding to `S::x` has two types:
2117+
* `int` and `char`.
2118+
*/
2119+
Type getAType() { result = this.getAField().getType() }
2120+
2121+
Type getAnUnspecifiedType() { result = this.getAType().getUnspecifiedType() }
2122+
}
2123+
2124+
private class NonLocalCanonicalField extends CanonicalField {
2125+
Class declaringType;
2126+
2127+
NonLocalCanonicalField() {
2128+
declaringType = this.getDeclaringType() and
2129+
not declaringType.isFromTemplateInstantiation(_) and
2130+
not declaringType.isLocal() // handled in LocalCanonicalField
2131+
}
2132+
2133+
override Field getAField() {
2134+
exists(Class c | result.getDeclaringType() = c |
2135+
// Either the declaring class of the field is a template instantiation
2136+
// that has been constructed from this canonical declaration
2137+
c.isConstructedFrom(declaringType) and
2138+
pragma[only_bind_out](result.getName()) = pragma[only_bind_out](this.getName())
2139+
or
2140+
// or this canonical declaration is not a template.
2141+
not c.isConstructedFrom(_) and
2142+
result = this
2143+
)
2144+
}
2145+
2146+
override Class getADeclaringType() {
2147+
result = this.getDeclaringType()
2148+
or
2149+
result.isConstructedFrom(this.getDeclaringType())
2150+
}
2151+
}
2152+
2153+
private class LocalCanonicalField extends CanonicalField {
2154+
Class declaringType;
2155+
2156+
LocalCanonicalField() {
2157+
declaringType = this.getDeclaringType() and
2158+
declaringType.isLocal()
2159+
}
2160+
2161+
override Field getAField() { result = this }
2162+
2163+
override Class getADeclaringType() { result = declaringType }
2164+
}
2165+
2166+
/**
2167+
* A canonical representation of a `Union`. See `CanonicalField` for the explanation for
2168+
* why we need a canonical representation.
2169+
*/
2170+
abstract private class CanonicalUnion extends Union {
2171+
/** Gets a union represented by this canonical union. */
2172+
abstract Union getAUnion();
2173+
2174+
/** Gets a canonical field of this canonical union. */
2175+
CanonicalField getACanonicalField() { result.getDeclaringType() = this }
2176+
}
2177+
2178+
private class NonLocalCanonicalUnion extends CanonicalUnion {
2179+
NonLocalCanonicalUnion() { not this.isFromTemplateInstantiation(_) and not this.isLocal() }
2180+
2181+
override Union getAUnion() {
2182+
result = this
2183+
or
2184+
result.isConstructedFrom(this)
2185+
}
2186+
}
2187+
2188+
private class LocalCanonicalUnion extends CanonicalUnion {
2189+
LocalCanonicalUnion() { this.isLocal() }
2190+
2191+
override Union getAUnion() { result = this }
2192+
}
2193+
20812194
bindingset[f]
20822195
pragma[inline_late]
2083-
private int getFieldSize(Field f) { result = f.getType().getSize() }
2196+
private int getFieldSize(CanonicalField f) { result = max(f.getAType().getSize()) }
20842197

20852198
/**
20862199
* Gets a field in the union `u` whose size
20872200
* is `bytes` number of bytes.
20882201
*/
2089-
private Field getAFieldWithSize(Union u, int bytes) {
2090-
result = u.getAField() and
2202+
private CanonicalField getAFieldWithSize(CanonicalUnion u, int bytes) {
2203+
result = u.getACanonicalField() and
20912204
bytes = getFieldSize(result)
20922205
}
20932206

20942207
cached
20952208
private newtype TContent =
2096-
TNonUnionContent(Field f, int indirectionIndex) {
2209+
TNonUnionContent(CanonicalField f, int indirectionIndex) {
20972210
// the indirection index for field content starts at 1 (because `TNonUnionContent` is thought of as
20982211
// the address of the field, `FieldAddress` in the IR).
2099-
indirectionIndex = [1 .. SsaImpl::getMaxIndirectionsForType(f.getUnspecifiedType())] and
2212+
indirectionIndex = [1 .. max(SsaImpl::getMaxIndirectionsForType(f.getAnUnspecifiedType()))] and
21002213
// Reads and writes of union fields are tracked using `UnionContent`.
21012214
not f.getDeclaringType() instanceof Union
21022215
} or
2103-
TUnionContent(Union u, int bytes, int indirectionIndex) {
2104-
exists(Field f |
2105-
f = u.getAField() and
2216+
TUnionContent(CanonicalUnion u, int bytes, int indirectionIndex) {
2217+
exists(CanonicalField f |
2218+
f = u.getACanonicalField() and
21062219
bytes = getFieldSize(f) and
21072220
// We key `UnionContent` by the union instead of its fields since a write to one
21082221
// field can be read by any read of the union's fields. Again, the indirection index
21092222
// is 1-based (because 0 is considered the address).
21102223
indirectionIndex =
21112224
[1 .. max(SsaImpl::getMaxIndirectionsForType(getAFieldWithSize(u, bytes)
2112-
.getUnspecifiedType())
2225+
.getAnUnspecifiedType())
21132226
)]
21142227
)
21152228
} or
@@ -2175,8 +2288,12 @@ class FieldContent extends Content, TFieldContent {
21752288

21762289
/**
21772290
* Gets the field associated with this `Content`, if a unique one exists.
2291+
*
2292+
* For fields from template instantiations this predicate may still return
2293+
* more than one field, but all the fields will be constructed from the same
2294+
* template.
21782295
*/
2179-
final Field getField() { result = unique( | | this.getAField()) }
2296+
Field getField() { none() } // overridden in subclasses
21802297

21812298
override int getIndirectionIndex() { none() } // overridden in subclasses
21822299

@@ -2187,57 +2304,65 @@ class FieldContent extends Content, TFieldContent {
21872304

21882305
/** A reference through a non-union instance field. */
21892306
class NonUnionFieldContent extends FieldContent, TNonUnionContent {
2190-
private Field f;
2307+
private CanonicalField f;
21912308
private int indirectionIndex;
21922309

21932310
NonUnionFieldContent() { this = TNonUnionContent(f, indirectionIndex) }
21942311

21952312
override string toString() { result = contentStars(this) + f.toString() }
21962313

2197-
override Field getAField() { result = f }
2314+
final override Field getField() { result = f.getAField() }
2315+
2316+
override Field getAField() { result = this.getField() }
21982317

21992318
/** Gets the indirection index of this `FieldContent`. */
22002319
override int getIndirectionIndex() { result = indirectionIndex }
22012320

22022321
override predicate impliesClearOf(Content c) {
2203-
exists(FieldContent fc |
2204-
fc = c and
2205-
fc.getField() = f and
2322+
exists(int i |
2323+
c = TNonUnionContent(f, i) and
22062324
// If `this` is `f` then `c` is cleared if it's of the
22072325
// form `*f`, `**f`, etc.
2208-
fc.getIndirectionIndex() >= indirectionIndex
2326+
i >= indirectionIndex
22092327
)
22102328
}
22112329
}
22122330

22132331
/** A reference through an instance field of a union. */
22142332
class UnionContent extends FieldContent, TUnionContent {
2215-
private Union u;
2333+
private CanonicalUnion u;
22162334
private int indirectionIndex;
22172335
private int bytes;
22182336

22192337
UnionContent() { this = TUnionContent(u, bytes, indirectionIndex) }
22202338

22212339
override string toString() { result = contentStars(this) + u.toString() }
22222340

2341+
final override Field getField() { result = unique( | | u.getACanonicalField()).getAField() }
2342+
22232343
/** Gets a field of the underlying union of this `UnionContent`, if any. */
2224-
override Field getAField() { result = u.getAField() and getFieldSize(result) = bytes }
2344+
override Field getAField() {
2345+
exists(CanonicalField cf |
2346+
cf = u.getACanonicalField() and
2347+
result = cf.getAField() and
2348+
getFieldSize(cf) = bytes
2349+
)
2350+
}
22252351

22262352
/** Gets the underlying union of this `UnionContent`. */
2227-
Union getUnion() { result = u }
2353+
Union getUnion() { result = u.getAUnion() }
22282354

22292355
/** Gets the indirection index of this `UnionContent`. */
22302356
override int getIndirectionIndex() { result = indirectionIndex }
22312357

22322358
override predicate impliesClearOf(Content c) {
2233-
exists(UnionContent uc |
2234-
uc = c and
2235-
uc.getUnion() = u and
2359+
exists(int i |
2360+
c = TUnionContent(u, _, i) and
22362361
// If `this` is `u` then `c` is cleared if it's of the
22372362
// form `*u`, `**u`, etc. (and we ignore `bytes` because
22382363
// we know the entire union is overwritten because it's a
22392364
// union).
2240-
uc.getIndirectionIndex() >= indirectionIndex
2365+
i >= indirectionIndex
22412366
)
22422367
}
22432368
}

cpp/ql/test/library-tests/dataflow/fields/dataflow-consistency.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ postWithInFlow
142142
| simple.cpp:92:7:92:7 | i [post update] | PostUpdateNode should not be the target of local flow. |
143143
| simple.cpp:118:7:118:7 | i [post update] | PostUpdateNode should not be the target of local flow. |
144144
| simple.cpp:124:5:124:6 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
145+
| simple.cpp:167:9:167:9 | x [post update] | PostUpdateNode should not be the target of local flow. |
145146
viableImplInCallContextTooLarge
146147
uniqueParameterNodeAtPosition
147148
uniqueParameterNodePosition

cpp/ql/test/library-tests/dataflow/fields/partial-definition-diff.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,5 @@ WARNING: module 'DataFlow' has been deprecated and may be removed in future (par
308308
| simple.cpp:124:5:124:6 | * ... | AST only |
309309
| simple.cpp:131:14:131:14 | a | IR only |
310310
| simple.cpp:136:10:136:10 | a | IR only |
311+
| simple.cpp:167:9:167:9 | x | AST only |
312+
| simple.cpp:168:8:168:12 | u_int | IR only |

cpp/ql/test/library-tests/dataflow/fields/partial-definition-ir.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,8 @@
670670
| simple.cpp:131:14:131:14 | a |
671671
| simple.cpp:135:20:135:20 | q |
672672
| simple.cpp:136:10:136:10 | a |
673+
| simple.cpp:167:3:167:7 | u_int |
674+
| simple.cpp:168:8:168:12 | u_int |
673675
| struct_init.c:15:8:15:9 | ab |
674676
| struct_init.c:15:12:15:12 | a |
675677
| struct_init.c:16:8:16:9 | ab |

cpp/ql/test/library-tests/dataflow/fields/partial-definition.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,8 @@ WARNING: module 'DataFlow' has been deprecated and may be removed in future (par
597597
| simple.cpp:118:7:118:7 | i |
598598
| simple.cpp:124:5:124:6 | * ... |
599599
| simple.cpp:135:20:135:20 | q |
600+
| simple.cpp:167:3:167:7 | u_int |
601+
| simple.cpp:167:9:167:9 | x |
600602
| struct_init.c:15:8:15:9 | ab |
601603
| struct_init.c:15:12:15:12 | a |
602604
| struct_init.c:16:8:16:9 | ab |

cpp/ql/test/library-tests/dataflow/fields/simple.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,36 @@ void alias_with_fields(bool b) {
136136
sink(a.i); // $ MISSING: ast,ir
137137
}
138138

139+
template<typename T>
140+
union U_with_two_instantiations_of_different_size {
141+
int x;
142+
T y;
143+
};
144+
145+
struct LargeStruct {
146+
int data[64];
147+
};
148+
149+
void test_union_with_two_instantiations_of_different_sizes() {
150+
// A union's fields is partitioned into "chunks" for field-flow in order to
151+
// improve performance (so that a write to a field of a union does not flow
152+
// to too many reads that don't happen at runtime). The partitioning is based
153+
// the size of the types in the union. So a write to a field of size k only
154+
// flows to a read of size k.
155+
// Since field-flow is based on uninstantiated types a field can have
156+
// multiple sizes if the union is instantiated with types of
157+
// different sizes. So to compute the partition we pick the maximum size.
158+
// Because of this there are `Content`s corresponding to the union
159+
// `U_with_two_instantiations_of_different_size<T>`: The one for size
160+
// `sizeof(int)`, and the one for size `sizeof(LargeStruct)` (because
161+
// `LargeStruct` is larger than `int`). So the write to `x` writes to the
162+
// `Content` for size `sizeof(int)`, and the read of `y` reads from the
163+
// `Content` for size `sizeof(LargeStruct)`.
164+
U_with_two_instantiations_of_different_size<int> u_int;
165+
U_with_two_instantiations_of_different_size<LargeStruct> u_very_large;
166+
167+
u_int.x = user_input();
168+
sink(u_int.y); // $ MISSING: ir
169+
}
170+
139171
} // namespace Simple

cpp/ql/test/library-tests/variables/variables/variable.expected

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
| file://:0:0:0:0 | (unnamed parameter 0) | file://:0:0:0:0 | address && | SemanticStackVariable | | |
33
| file://:0:0:0:0 | (unnamed parameter 0) | file://:0:0:0:0 | const __va_list_tag & | SemanticStackVariable | | |
44
| file://:0:0:0:0 | (unnamed parameter 0) | file://:0:0:0:0 | const address & | SemanticStackVariable | | |
5-
| file://:0:0:0:0 | fp_offset | file://:0:0:0:0 | unsigned int | Field | | |
6-
| file://:0:0:0:0 | gp_offset | file://:0:0:0:0 | unsigned int | Field | | |
7-
| file://:0:0:0:0 | overflow_arg_area | file://:0:0:0:0 | void * | Field | | |
8-
| file://:0:0:0:0 | reg_save_area | file://:0:0:0:0 | void * | Field | | |
5+
| file://:0:0:0:0 | fp_offset | file://:0:0:0:0 | unsigned int | NonLocalCanonicalField | | |
6+
| file://:0:0:0:0 | gp_offset | file://:0:0:0:0 | unsigned int | NonLocalCanonicalField | | |
7+
| file://:0:0:0:0 | overflow_arg_area | file://:0:0:0:0 | void * | NonLocalCanonicalField | | |
8+
| file://:0:0:0:0 | reg_save_area | file://:0:0:0:0 | void * | NonLocalCanonicalField | | |
99
| variables.cpp:1:12:1:12 | i | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | |
1010
| variables.cpp:2:12:2:12 | i | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | |
1111
| variables.cpp:3:12:3:12 | i | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | |
@@ -33,10 +33,10 @@
3333
| variables.cpp:37:6:37:8 | ap3 | file://:0:0:0:0 | int * | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | |
3434
| variables.cpp:41:7:41:11 | local | file://:0:0:0:0 | char[] | LocalVariable, SemanticStackVariable | | |
3535
| variables.cpp:43:14:43:18 | local | file://:0:0:0:0 | int | GlobalLikeVariable, StaticLocalVariable | | static |
36-
| variables.cpp:48:9:48:12 | name | file://:0:0:0:0 | char * | Field | | |
37-
| variables.cpp:49:12:49:17 | number | file://:0:0:0:0 | long | Field | | |
38-
| variables.cpp:50:9:50:14 | street | file://:0:0:0:0 | char * | Field | | |
39-
| variables.cpp:51:9:51:12 | town | file://:0:0:0:0 | char * | Field | | |
36+
| variables.cpp:48:9:48:12 | name | file://:0:0:0:0 | char * | NonLocalCanonicalField | | |
37+
| variables.cpp:49:12:49:17 | number | file://:0:0:0:0 | long | NonLocalCanonicalField | | |
38+
| variables.cpp:50:9:50:14 | street | file://:0:0:0:0 | char * | NonLocalCanonicalField | | |
39+
| variables.cpp:51:9:51:12 | town | file://:0:0:0:0 | char * | NonLocalCanonicalField | | |
4040
| variables.cpp:52:16:52:22 | country | file://:0:0:0:0 | char * | MemberVariable, StaticStorageDurationVariable | | static |
4141
| variables.cpp:56:14:56:29 | externInFunction | file://:0:0:0:0 | int | GlobalLikeVariable, GlobalVariable, StaticStorageDurationVariable | | |
4242
| variables.cpp:60:10:60:17 | __func__ | file://:0:0:0:0 | const char[9] | GlobalLikeVariable, StaticInitializedStaticLocalVariable | | static |

0 commit comments

Comments
 (0)