Skip to content

Commit 2275b19

Browse files
committed
Reapply "[clang][analyzer] Stable order for SymbolRef-keyed containers"
This reverts commit a106ad0.
1 parent a2b9058 commit 2275b19

File tree

11 files changed

+149
-85
lines changed

11 files changed

+149
-85
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ namespace ento {
2525

2626
class MemRegion;
2727

28+
using SymbolID = unsigned;
29+
2830
/// Symbolic value. These values used to capture symbolic execution of
2931
/// the program.
3032
class SymExpr : public llvm::FoldingSetNode {
@@ -39,9 +41,19 @@ class SymExpr : public llvm::FoldingSetNode {
3941

4042
private:
4143
Kind K;
44+
/// A unique identifier for this symbol.
45+
///
46+
/// It is useful for SymbolData to easily differentiate multiple symbols, but
47+
/// also for "ephemeral" symbols, such as binary operations, because this id
48+
/// can be used for arranging constraints or equivalence classes instead of
49+
/// unstable pointer values.
50+
///
51+
/// Note, however, that it can't be used in Profile because SymbolManager
52+
/// needs to compute Profile before allocating SymExpr.
53+
const SymbolID Sym;
4254

4355
protected:
44-
SymExpr(Kind k) : K(k) {}
56+
SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}
4557

4658
static bool isValidTypeForSymbol(QualType T) {
4759
// FIXME: Depending on whether we choose to deprecate structural symbols,
@@ -56,6 +68,14 @@ class SymExpr : public llvm::FoldingSetNode {
5668

5769
Kind getKind() const { return K; }
5870

71+
/// Get a unique identifier for this symbol.
72+
/// The ID is unique across all SymExprs in a SymbolManager.
73+
/// They reflect the allocation order of these SymExprs,
74+
/// and are likely stable across runs.
75+
/// Used as a key in SymbolRef containers and as part of identity
76+
/// for SymbolData, e.g. SymbolConjured with ID = 7 is "conj_$7".
77+
SymbolID getSymbolID() const { return Sym; }
78+
5979
virtual void dump() const;
6080

6181
virtual void dumpToStream(raw_ostream &os) const {}
@@ -112,28 +132,21 @@ inline raw_ostream &operator<<(raw_ostream &os,
112132

113133
using SymbolRef = const SymExpr *;
114134
using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
115-
using SymbolID = unsigned;
116135

117136
/// A symbol representing data which can be stored in a memory location
118137
/// (region).
119138
class SymbolData : public SymExpr {
120-
const SymbolID Sym;
121-
122139
void anchor() override;
123140

124141
protected:
125-
SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {
126-
assert(classof(this));
127-
}
142+
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }
128143

129144
public:
130145
~SymbolData() override = default;
131146

132147
/// Get a string representation of the kind of the region.
133148
virtual StringRef getKindStr() const = 0;
134149

135-
SymbolID getSymbolID() const { return Sym; }
136-
137150
unsigned computeComplexity() const override {
138151
return 1;
139152
};

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h

Lines changed: 78 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/ADT/DenseMap.h"
2626
#include "llvm/ADT/DenseSet.h"
2727
#include "llvm/ADT/FoldingSet.h"
28+
#include "llvm/ADT/ImmutableSet.h"
2829
#include "llvm/ADT/iterator_range.h"
2930
#include "llvm/Support/Allocator.h"
3031
#include <cassert>
@@ -43,15 +44,16 @@ class StoreManager;
4344
class SymbolRegionValue : public SymbolData {
4445
const TypedValueRegion *R;
4546

46-
public:
47+
friend class SymExprAllocator;
4748
SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
4849
: SymbolData(SymbolRegionValueKind, sym), R(r) {
4950
assert(r);
5051
assert(isValidTypeForSymbol(r->getValueType()));
5152
}
5253

54+
public:
5355
LLVM_ATTRIBUTE_RETURNS_NONNULL
54-
const TypedValueRegion* getRegion() const { return R; }
56+
const TypedValueRegion *getRegion() const { return R; }
5557

5658
static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
5759
profile.AddInteger((unsigned) SymbolRegionValueKind);
@@ -84,7 +86,7 @@ class SymbolConjured : public SymbolData {
8486
const LocationContext *LCtx;
8587
const void *SymbolTag;
8688

87-
public:
89+
friend class SymExprAllocator;
8890
SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
8991
QualType t, unsigned count, const void *symbolTag)
9092
: SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
@@ -98,6 +100,7 @@ class SymbolConjured : public SymbolData {
98100
assert(isValidTypeForSymbol(t));
99101
}
100102

103+
public:
101104
/// It might return null.
102105
const Stmt *getStmt() const { return S; }
103106
unsigned getCount() const { return Count; }
@@ -137,14 +140,15 @@ class SymbolDerived : public SymbolData {
137140
SymbolRef parentSymbol;
138141
const TypedValueRegion *R;
139142

140-
public:
143+
friend class SymExprAllocator;
141144
SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
142145
: SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
143146
assert(parent);
144147
assert(r);
145148
assert(isValidTypeForSymbol(r->getValueType()));
146149
}
147150

151+
public:
148152
LLVM_ATTRIBUTE_RETURNS_NONNULL
149153
SymbolRef getParentSymbol() const { return parentSymbol; }
150154
LLVM_ATTRIBUTE_RETURNS_NONNULL
@@ -180,12 +184,13 @@ class SymbolDerived : public SymbolData {
180184
class SymbolExtent : public SymbolData {
181185
const SubRegion *R;
182186

183-
public:
187+
friend class SymExprAllocator;
184188
SymbolExtent(SymbolID sym, const SubRegion *r)
185189
: SymbolData(SymbolExtentKind, sym), R(r) {
186190
assert(r);
187191
}
188192

193+
public:
189194
LLVM_ATTRIBUTE_RETURNS_NONNULL
190195
const SubRegion *getRegion() const { return R; }
191196

@@ -222,7 +227,7 @@ class SymbolMetadata : public SymbolData {
222227
unsigned Count;
223228
const void *Tag;
224229

225-
public:
230+
friend class SymExprAllocator;
226231
SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
227232
const LocationContext *LCtx, unsigned count, const void *tag)
228233
: SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
@@ -234,6 +239,7 @@ class SymbolMetadata : public SymbolData {
234239
assert(tag);
235240
}
236241

242+
public:
237243
LLVM_ATTRIBUTE_RETURNS_NONNULL
238244
const MemRegion *getRegion() const { return R; }
239245

@@ -286,15 +292,16 @@ class SymbolCast : public SymExpr {
286292
/// The type of the result.
287293
QualType ToTy;
288294

289-
public:
290-
SymbolCast(const SymExpr *In, QualType From, QualType To)
291-
: SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
295+
friend class SymExprAllocator;
296+
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
297+
: SymExpr(SymbolCastKind, Sym), Operand(In), FromTy(From), ToTy(To) {
292298
assert(In);
293299
assert(isValidTypeForSymbol(From));
294300
// FIXME: GenericTaintChecker creates symbols of void type.
295301
// Otherwise, 'To' should also be a valid type.
296302
}
297303

304+
public:
298305
unsigned computeComplexity() const override {
299306
if (Complexity == 0)
300307
Complexity = 1 + Operand->computeComplexity();
@@ -332,9 +339,10 @@ class UnarySymExpr : public SymExpr {
332339
UnaryOperator::Opcode Op;
333340
QualType T;
334341

335-
public:
336-
UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T)
337-
: SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) {
342+
friend class SymExprAllocator;
343+
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
344+
QualType T)
345+
: SymExpr(UnarySymExprKind, Sym), Operand(In), Op(Op), T(T) {
338346
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
339347
// modeled as x + 1.
340348
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
@@ -345,6 +353,7 @@ class UnarySymExpr : public SymExpr {
345353
assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
346354
}
347355

356+
public:
348357
unsigned computeComplexity() const override {
349358
if (Complexity == 0)
350359
Complexity = 1 + Operand->computeComplexity();
@@ -381,8 +390,8 @@ class BinarySymExpr : public SymExpr {
381390
QualType T;
382391

383392
protected:
384-
BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
385-
: SymExpr(k), Op(op), T(t) {
393+
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
394+
: SymExpr(k, Sym), Op(op), T(t) {
386395
assert(classof(this));
387396
// Binary expressions are results of arithmetic. Pointer arithmetic is not
388397
// handled by binary expressions, but it is instead handled by applying
@@ -425,14 +434,15 @@ class BinarySymExprImpl : public BinarySymExpr {
425434
LHSTYPE LHS;
426435
RHSTYPE RHS;
427436

428-
public:
429-
BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
430-
QualType t)
431-
: BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
437+
friend class SymExprAllocator;
438+
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
439+
RHSTYPE rhs, QualType t)
440+
: BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
432441
assert(getPointer(lhs));
433442
assert(getPointer(rhs));
434443
}
435444

445+
public:
436446
void dumpToStream(raw_ostream &os) const override {
437447
dumpToStreamImpl(os, LHS);
438448
dumpToStreamImpl(os, getOpcode());
@@ -478,6 +488,21 @@ using IntSymExpr = BinarySymExprImpl<APSIntPtr, const SymExpr *,
478488
using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *,
479489
SymExpr::Kind::SymSymExprKind>;
480490

491+
class SymExprAllocator {
492+
SymbolID NextSymbolID = 0;
493+
llvm::BumpPtrAllocator &Alloc;
494+
495+
public:
496+
explicit SymExprAllocator(llvm::BumpPtrAllocator &Alloc) : Alloc(Alloc) {}
497+
498+
template <class SymT, typename... ArgsT> SymT *make(ArgsT &&...Args) {
499+
return new (Alloc) SymT(nextID(), std::forward<ArgsT>(Args)...);
500+
}
501+
502+
private:
503+
SymbolID nextID() { return NextSymbolID++; }
504+
};
505+
481506
class SymbolManager {
482507
using DataSetTy = llvm::FoldingSet<SymExpr>;
483508
using SymbolDependTy =
@@ -489,15 +514,14 @@ class SymbolManager {
489514
/// alive as long as the key is live.
490515
SymbolDependTy SymbolDependencies;
491516

492-
unsigned SymbolCounter = 0;
493-
llvm::BumpPtrAllocator& BPAlloc;
517+
SymExprAllocator Alloc;
494518
BasicValueFactory &BV;
495519
ASTContext &Ctx;
496520

497521
public:
498522
SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
499-
llvm::BumpPtrAllocator& bpalloc)
500-
: SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
523+
llvm::BumpPtrAllocator &bpalloc)
524+
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}
501525

502526
static bool canSymbolicate(QualType T);
503527

@@ -687,4 +711,36 @@ class SymbolVisitor {
687711

688712
} // namespace clang
689713

714+
// Override the default definition that would use pointer values of SymbolRefs
715+
// to order them, which is unstable due to ASLR.
716+
// Use the SymbolID instead which reflect the order in which the symbols were
717+
// allocated. This is usually stable across runs leading to the stability of
718+
// ConstraintMap and other containers using SymbolRef as keys.
719+
template <>
720+
struct ::llvm::ImutContainerInfo<clang::ento::SymbolRef>
721+
: public ImutProfileInfo<clang::ento::SymbolRef> {
722+
using value_type = clang::ento::SymbolRef;
723+
using value_type_ref = clang::ento::SymbolRef;
724+
using key_type = value_type;
725+
using key_type_ref = value_type_ref;
726+
using data_type = bool;
727+
using data_type_ref = bool;
728+
729+
static key_type_ref KeyOfValue(value_type_ref D) { return D; }
730+
static data_type_ref DataOfValue(value_type_ref) { return true; }
731+
732+
static bool isEqual(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
733+
return LHS->getSymbolID() == RHS->getSymbolID();
734+
}
735+
736+
static bool isLess(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
737+
return LHS->getSymbolID() < RHS->getSymbolID();
738+
}
739+
740+
// This might seem redundant, but it is required because of the way
741+
// ImmutableSet is implemented through AVLTree:
742+
// same as ImmutableMap, but with a non-informative "data".
743+
static bool isDataEqual(data_type_ref, data_type_ref) { return true; }
744+
};
745+
690746
#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H

0 commit comments

Comments
 (0)