Skip to content

Commit 2e7b140

Browse files
committed
[clang][analyzer] Stabilize path-constraint order by using alloc IDs
These IDs are essentially allocator offsets for the SymExpr pool. They are superior to raw pointer values because they are more controllable and are not randomized across executions. They are not stable across runs because some spurious allocations might happen only in certain runs. For example, an unrelated ImmutableMap rotates its AVL tree based on the key values that are pointers. The rotation causes extra allocations in the shared allocator, and the next SymExpr allocation happens at a different offset. Yet, these IDs *order* is stable across runs because SymExprs are allocated in the same order and allocator offset grows monotonously. Stability of the constraint order is important for the stability of the analyzer results. I evaluated this change on a set of 200+ open-source C++ projects with the total number of ~78 000 symbolic-execution issues. This patch reduced the run-to-run churn (flakyness) in SE issues from 80-90 to 30-40 (out of 78K) in our CSA deployment (in our setting flaky issues are mostly due to Z3 refutation instability). Importantly, this change is necessary for the next step in stabilizing analysis results by caching Z3 query outcomes between analysis runs (work in progress). Across our admittedly noisy CI runs, I detected no noticeable effect on memory footprint or analysis time. CPP-5919
1 parent 79af7bd commit 2e7b140

File tree

4 files changed

+74
-39
lines changed

4 files changed

+74
-39
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,22 @@ class RangeSet {
401401
friend class Factory;
402402
};
403403

404-
using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>;
404+
struct ConstraintKVInfo : llvm::ImutKeyValueInfo<SymbolRef, RangeSet> {
405+
static inline bool isEqual(key_type_ref L, key_type_ref R) {
406+
return L->getAllocID() == R->getAllocID();
407+
}
408+
409+
static inline bool isLess(key_type_ref L, key_type_ref R) {
410+
return L->getAllocID() < R->getAllocID();
411+
}
412+
413+
static inline void Profile(llvm::FoldingSetNodeID &ID, value_type_ref V) {
414+
ID.AddInteger(V.first->getAllocID());
415+
ID.Add(V.second);
416+
}
417+
};
418+
419+
using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet, ConstraintKVInfo>;
405420
ConstraintMap getConstraintMap(ProgramStateRef State);
406421

407422
class RangedConstraintManager : public SimpleConstraintManager {

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class SymExpr : public llvm::FoldingSetNode {
3131
virtual void anchor();
3232

3333
public:
34+
using AllocIDType = int64_t;
3435
enum Kind {
3536
#define SYMBOL(Id, Parent) Id##Kind,
3637
#define SYMBOL_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last,
@@ -39,9 +40,10 @@ class SymExpr : public llvm::FoldingSetNode {
3940

4041
private:
4142
Kind K;
43+
AllocIDType AllocID;
4244

4345
protected:
44-
SymExpr(Kind k) : K(k) {}
46+
SymExpr(Kind K, AllocIDType AllocID) : K(K), AllocID(AllocID) {}
4547

4648
static bool isValidTypeForSymbol(QualType T) {
4749
// FIXME: Depending on whether we choose to deprecate structural symbols,
@@ -56,6 +58,8 @@ class SymExpr : public llvm::FoldingSetNode {
5658

5759
Kind getKind() const { return K; }
5860

61+
AllocIDType getAllocID() const { return AllocID; }
62+
5963
virtual void dump() const;
6064

6165
virtual void dumpToStream(raw_ostream &os) const {}
@@ -122,7 +126,8 @@ class SymbolData : public SymExpr {
122126
void anchor() override;
123127

124128
protected:
125-
SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {
129+
SymbolData(Kind k, SymbolID sym, AllocIDType AllocID)
130+
: SymExpr(k, AllocID), Sym(sym) {
126131
assert(classof(this));
127132
}
128133

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ class SymbolRegionValue : public SymbolData {
4444
const TypedValueRegion *R;
4545

4646
public:
47-
SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
48-
: SymbolData(SymbolRegionValueKind, sym), R(r) {
47+
SymbolRegionValue(SymbolID sym, const TypedValueRegion *r,
48+
AllocIDType AllocID)
49+
: SymbolData(SymbolRegionValueKind, sym, AllocID), R(r) {
4950
assert(r);
5051
assert(isValidTypeForSymbol(r->getValueType()));
5152
}
@@ -86,8 +87,9 @@ class SymbolConjured : public SymbolData {
8687

8788
public:
8889
SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
89-
QualType t, unsigned count, const void *symbolTag)
90-
: SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
90+
QualType t, unsigned count, const void *symbolTag,
91+
AllocIDType AllocID)
92+
: SymbolData(SymbolConjuredKind, sym, AllocID), S(s), T(t), Count(count),
9193
LCtx(lctx), SymbolTag(symbolTag) {
9294
// FIXME: 's' might be a nullptr if we're conducting invalidation
9395
// that was caused by a destructor call on a temporary object,
@@ -138,8 +140,10 @@ class SymbolDerived : public SymbolData {
138140
const TypedValueRegion *R;
139141

140142
public:
141-
SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
142-
: SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
143+
SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r,
144+
AllocIDType AllocID)
145+
: SymbolData(SymbolDerivedKind, sym, AllocID), parentSymbol(parent),
146+
R(r) {
143147
assert(parent);
144148
assert(r);
145149
assert(isValidTypeForSymbol(r->getValueType()));
@@ -181,8 +185,8 @@ class SymbolExtent : public SymbolData {
181185
const SubRegion *R;
182186

183187
public:
184-
SymbolExtent(SymbolID sym, const SubRegion *r)
185-
: SymbolData(SymbolExtentKind, sym), R(r) {
188+
SymbolExtent(SymbolID sym, const SubRegion *r, AllocIDType AllocID)
189+
: SymbolData(SymbolExtentKind, sym, AllocID), R(r) {
186190
assert(r);
187191
}
188192

@@ -223,16 +227,17 @@ class SymbolMetadata : public SymbolData {
223227
const void *Tag;
224228

225229
public:
226-
SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
227-
const LocationContext *LCtx, unsigned count, const void *tag)
228-
: SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
229-
Count(count), Tag(tag) {
230-
assert(r);
231-
assert(s);
232-
assert(isValidTypeForSymbol(t));
233-
assert(LCtx);
234-
assert(tag);
235-
}
230+
SymbolMetadata(SymbolID sym, const MemRegion *r, const Stmt *s, QualType t,
231+
const LocationContext *LCtx, unsigned count, const void *tag,
232+
AllocIDType AllocID)
233+
: SymbolData(SymbolMetadataKind, sym, AllocID), R(r), S(s), T(t),
234+
LCtx(LCtx), Count(count), Tag(tag) {
235+
assert(r);
236+
assert(s);
237+
assert(isValidTypeForSymbol(t));
238+
assert(LCtx);
239+
assert(tag);
240+
}
236241

237242
LLVM_ATTRIBUTE_RETURNS_NONNULL
238243
const MemRegion *getRegion() const { return R; }
@@ -287,8 +292,8 @@ class SymbolCast : public SymExpr {
287292
QualType ToTy;
288293

289294
public:
290-
SymbolCast(const SymExpr *In, QualType From, QualType To)
291-
: SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
295+
SymbolCast(const SymExpr *In, QualType From, QualType To, AllocIDType AllocID)
296+
: SymExpr(SymbolCastKind, AllocID), Operand(In), FromTy(From), ToTy(To) {
292297
assert(In);
293298
assert(isValidTypeForSymbol(From));
294299
// FIXME: GenericTaintChecker creates symbols of void type.
@@ -333,8 +338,9 @@ class UnarySymExpr : public SymExpr {
333338
QualType T;
334339

335340
public:
336-
UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T)
337-
: SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) {
341+
UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T,
342+
AllocIDType AllocID)
343+
: SymExpr(UnarySymExprKind, AllocID), Operand(In), Op(Op), T(T) {
338344
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
339345
// modeled as x + 1.
340346
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
@@ -381,8 +387,9 @@ class BinarySymExpr : public SymExpr {
381387
QualType T;
382388

383389
protected:
384-
BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
385-
: SymExpr(k), Op(op), T(t) {
390+
BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t,
391+
AllocIDType AllocID)
392+
: SymExpr(k, AllocID), Op(op), T(t) {
386393
assert(classof(this));
387394
// Binary expressions are results of arithmetic. Pointer arithmetic is not
388395
// handled by binary expressions, but it is instead handled by applying
@@ -427,8 +434,8 @@ class BinarySymExprImpl : public BinarySymExpr {
427434

428435
public:
429436
BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
430-
QualType t)
431-
: BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
437+
QualType t, AllocIDType AllocID)
438+
: BinarySymExpr(ClassKind, op, t, AllocID), LHS(lhs), RHS(rhs) {
432439
assert(getPointer(lhs));
433440
assert(getPointer(rhs));
434441
}

clang/lib/StaticAnalyzer/Core/SymbolManager.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ SymbolManager::getRegionValueSymbol(const TypedValueRegion* R) {
170170
void *InsertPos;
171171
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
172172
if (!SD) {
173-
SD = new (BPAlloc) SymbolRegionValue(SymbolCounter, R);
173+
SD = new (BPAlloc)
174+
SymbolRegionValue(SymbolCounter, R, BPAlloc.getBytesAllocated());
174175
DataSet.InsertNode(SD, InsertPos);
175176
++SymbolCounter;
176177
}
@@ -188,7 +189,8 @@ const SymbolConjured* SymbolManager::conjureSymbol(const Stmt *E,
188189
void *InsertPos;
189190
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
190191
if (!SD) {
191-
SD = new (BPAlloc) SymbolConjured(SymbolCounter, E, LCtx, T, Count, SymbolTag);
192+
SD = new (BPAlloc) SymbolConjured(SymbolCounter, E, LCtx, T, Count,
193+
SymbolTag, BPAlloc.getBytesAllocated());
192194
DataSet.InsertNode(SD, InsertPos);
193195
++SymbolCounter;
194196
}
@@ -204,7 +206,8 @@ SymbolManager::getDerivedSymbol(SymbolRef parentSymbol,
204206
void *InsertPos;
205207
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
206208
if (!SD) {
207-
SD = new (BPAlloc) SymbolDerived(SymbolCounter, parentSymbol, R);
209+
SD = new (BPAlloc) SymbolDerived(SymbolCounter, parentSymbol, R,
210+
BPAlloc.getBytesAllocated());
208211
DataSet.InsertNode(SD, InsertPos);
209212
++SymbolCounter;
210213
}
@@ -219,7 +222,8 @@ SymbolManager::getExtentSymbol(const SubRegion *R) {
219222
void *InsertPos;
220223
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
221224
if (!SD) {
222-
SD = new (BPAlloc) SymbolExtent(SymbolCounter, R);
225+
SD = new (BPAlloc)
226+
SymbolExtent(SymbolCounter, R, BPAlloc.getBytesAllocated());
223227
DataSet.InsertNode(SD, InsertPos);
224228
++SymbolCounter;
225229
}
@@ -236,7 +240,8 @@ SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt *S, QualType T,
236240
void *InsertPos;
237241
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
238242
if (!SD) {
239-
SD = new (BPAlloc) SymbolMetadata(SymbolCounter, R, S, T, LCtx, Count, SymbolTag);
243+
SD = new (BPAlloc) SymbolMetadata(SymbolCounter, R, S, T, LCtx, Count,
244+
SymbolTag, BPAlloc.getBytesAllocated());
240245
DataSet.InsertNode(SD, InsertPos);
241246
++SymbolCounter;
242247
}
@@ -252,7 +257,7 @@ SymbolManager::getCastSymbol(const SymExpr *Op,
252257
void *InsertPos;
253258
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
254259
if (!data) {
255-
data = new (BPAlloc) SymbolCast(Op, From, To);
260+
data = new (BPAlloc) SymbolCast(Op, From, To, BPAlloc.getBytesAllocated());
256261
DataSet.InsertNode(data, InsertPos);
257262
}
258263

@@ -268,7 +273,7 @@ const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs,
268273
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
269274

270275
if (!data) {
271-
data = new (BPAlloc) SymIntExpr(lhs, op, v, t);
276+
data = new (BPAlloc) SymIntExpr(lhs, op, v, t, BPAlloc.getBytesAllocated());
272277
DataSet.InsertNode(data, InsertPos);
273278
}
274279

@@ -284,7 +289,8 @@ const IntSymExpr *SymbolManager::getIntSymExpr(APSIntPtr lhs,
284289
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
285290

286291
if (!data) {
287-
data = new (BPAlloc) IntSymExpr(lhs, op, rhs, t);
292+
data =
293+
new (BPAlloc) IntSymExpr(lhs, op, rhs, t, BPAlloc.getBytesAllocated());
288294
DataSet.InsertNode(data, InsertPos);
289295
}
290296

@@ -301,7 +307,8 @@ const SymSymExpr *SymbolManager::getSymSymExpr(const SymExpr *lhs,
301307
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
302308

303309
if (!data) {
304-
data = new (BPAlloc) SymSymExpr(lhs, op, rhs, t);
310+
data =
311+
new (BPAlloc) SymSymExpr(lhs, op, rhs, t, BPAlloc.getBytesAllocated());
305312
DataSet.InsertNode(data, InsertPos);
306313
}
307314

@@ -316,7 +323,8 @@ const UnarySymExpr *SymbolManager::getUnarySymExpr(const SymExpr *Operand,
316323
void *InsertPos;
317324
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
318325
if (!data) {
319-
data = new (BPAlloc) UnarySymExpr(Operand, Opc, T);
326+
data = new (BPAlloc)
327+
UnarySymExpr(Operand, Opc, T, BPAlloc.getBytesAllocated());
320328
DataSet.InsertNode(data, InsertPos);
321329
}
322330

0 commit comments

Comments
 (0)