Skip to content

Commit 3e18b5a

Browse files
authored
[LifetimeSafety] Associate origins to all l-valued expressions (#156896)
This patch refactors the C++ lifetime safety analysis to implement a more consistent model for tracking borrows. The central idea is to make loan creation a consequence of referencing a variable, while making loan propagation dependent on the type's semantics. This change introduces a more uniform model for tracking borrows from non-pointer types: * Centralised Loan Creation: A Loan is now created for every `DeclRefExpr` that refers to a **non-pointer type** (e.g., `std::string`, `int`). This correctly models that any use of an **gl-value** is a borrow of its storage, replacing the previous heuristic-based loan creation. * The address-of operator (&) no longer creates loans. Instead, it propagates the origin (and thus the loans) of its sub-expression. This is guarded to exclude expressions that are already pointer types, deferring the complexity of pointers-to-pointers. **Future Work: Multi-Origin Model** This patch deliberately defers support for creating loans on references to pointer-type expressions (e.g., `&my_pointer`). The current single-origin model is unable to distinguish between a loan to the pointer variable itself (its storage) and a loan to the object it points to. The future plan is to move to a multi-origin model where a type has a "list of origins" governed by its level of indirection, which will allow the analysis to track these distinct lifetimes separately. Once this more advanced model is in place, the restriction can be lifted, and all `DeclRefExpr` nodes, regardless of type, can uniformly create a loan, making the analysis consistent.
1 parent e1aa2df commit 3e18b5a

File tree

2 files changed

+269
-169
lines changed

2 files changed

+269
-169
lines changed

clang/lib/Analysis/LifetimeSafety.cpp

Lines changed: 113 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ struct Loan {
5050

5151
Loan(LoanID id, AccessPath path, const Expr *IssueExpr)
5252
: ID(id), Path(path), IssueExpr(IssueExpr) {}
53+
54+
void dump(llvm::raw_ostream &OS) const {
55+
OS << ID << " (Path: ";
56+
OS << Path.D->getNameAsString() << ")";
57+
}
5358
};
5459

5560
/// An Origin is a symbolic identifier that represents the set of possible
@@ -120,17 +125,19 @@ class OriginManager {
120125

121126
// TODO: Mark this method as const once we remove the call to getOrCreate.
122127
OriginID get(const Expr &E) {
123-
// Origin of DeclRefExpr is that of the declaration it refers to.
128+
auto It = ExprToOriginID.find(&E);
129+
if (It != ExprToOriginID.end())
130+
return It->second;
131+
// If the expression itself has no specific origin, and it's a reference
132+
// to a declaration, its origin is that of the declaration it refers to.
133+
// For pointer types, where we don't pre-emptively create an origin for the
134+
// DeclRefExpr itself.
124135
if (const auto *DRE = dyn_cast<DeclRefExpr>(&E))
125136
return get(*DRE->getDecl());
126-
auto It = ExprToOriginID.find(&E);
127137
// TODO: This should be an assert(It != ExprToOriginID.end()). The current
128138
// implementation falls back to getOrCreate to avoid crashing on
129139
// yet-unhandled pointer expressions, creating an empty origin for them.
130-
if (It == ExprToOriginID.end())
131-
return getOrCreate(E);
132-
133-
return It->second;
140+
return getOrCreate(E);
134141
}
135142

136143
OriginID get(const ValueDecl &D) {
@@ -149,10 +156,6 @@ class OriginManager {
149156
if (It != ExprToOriginID.end())
150157
return It->second;
151158

152-
if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
153-
// Origin of DeclRefExpr is that of the declaration it refers to.
154-
return getOrCreate(*DRE->getDecl());
155-
}
156159
OriginID NewID = getNextOriginID();
157160
addOrigin(NewID, E);
158161
ExprToOriginID[&E] = NewID;
@@ -235,7 +238,8 @@ class Fact {
235238
return nullptr;
236239
}
237240

238-
virtual void dump(llvm::raw_ostream &OS, const OriginManager &) const {
241+
virtual void dump(llvm::raw_ostream &OS, const LoanManager &,
242+
const OriginManager &) const {
239243
OS << "Fact (Kind: " << static_cast<int>(K) << ")\n";
240244
}
241245
};
@@ -250,8 +254,11 @@ class IssueFact : public Fact {
250254
IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {}
251255
LoanID getLoanID() const { return LID; }
252256
OriginID getOriginID() const { return OID; }
253-
void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override {
254-
OS << "Issue (LoanID: " << getLoanID() << ", ToOrigin: ";
257+
void dump(llvm::raw_ostream &OS, const LoanManager &LM,
258+
const OriginManager &OM) const override {
259+
OS << "Issue (";
260+
LM.getLoan(getLoanID()).dump(OS);
261+
OS << ", ToOrigin: ";
255262
OM.dump(getOriginID(), OS);
256263
OS << ")\n";
257264
}
@@ -270,8 +277,11 @@ class ExpireFact : public Fact {
270277
LoanID getLoanID() const { return LID; }
271278
SourceLocation getExpiryLoc() const { return ExpiryLoc; }
272279

273-
void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override {
274-
OS << "Expire (LoanID: " << getLoanID() << ")\n";
280+
void dump(llvm::raw_ostream &OS, const LoanManager &LM,
281+
const OriginManager &) const override {
282+
OS << "Expire (";
283+
LM.getLoan(getLoanID()).dump(OS);
284+
OS << ")\n";
275285
}
276286
};
277287

@@ -288,7 +298,8 @@ class AssignOriginFact : public Fact {
288298
: Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {}
289299
OriginID getDestOriginID() const { return OIDDest; }
290300
OriginID getSrcOriginID() const { return OIDSrc; }
291-
void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override {
301+
void dump(llvm::raw_ostream &OS, const LoanManager &,
302+
const OriginManager &OM) const override {
292303
OS << "AssignOrigin (Dest: ";
293304
OM.dump(getDestOriginID(), OS);
294305
OS << ", Src: ";
@@ -307,7 +318,8 @@ class ReturnOfOriginFact : public Fact {
307318

308319
ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {}
309320
OriginID getReturnedOriginID() const { return OID; }
310-
void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override {
321+
void dump(llvm::raw_ostream &OS, const LoanManager &,
322+
const OriginManager &OM) const override {
311323
OS << "ReturnOfOrigin (";
312324
OM.dump(getReturnedOriginID(), OS);
313325
OS << ")\n";
@@ -333,10 +345,11 @@ class UseFact : public Fact {
333345
void markAsWritten() { IsWritten = true; }
334346
bool isWritten() const { return IsWritten; }
335347

336-
void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override {
348+
void dump(llvm::raw_ostream &OS, const LoanManager &,
349+
const OriginManager &OM) const override {
337350
OS << "Use (";
338351
OM.dump(getUsedOrigin(OM), OS);
339-
OS << " " << (isWritten() ? "Write" : "Read") << ")\n";
352+
OS << ", " << (isWritten() ? "Write" : "Read") << ")\n";
340353
}
341354
};
342355

@@ -353,7 +366,8 @@ class TestPointFact : public Fact {
353366

354367
StringRef getAnnotation() const { return Annotation; }
355368

356-
void dump(llvm::raw_ostream &OS, const OriginManager &) const override {
369+
void dump(llvm::raw_ostream &OS, const LoanManager &,
370+
const OriginManager &) const override {
357371
OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n";
358372
}
359373
};
@@ -392,7 +406,7 @@ class FactManager {
392406
if (It != BlockToFactsMap.end()) {
393407
for (const Fact *F : It->second) {
394408
llvm::dbgs() << " ";
395-
F->dump(llvm::dbgs(), OriginMgr);
409+
F->dump(llvm::dbgs(), LoanMgr, OriginMgr);
396410
}
397411
}
398412
llvm::dbgs() << " End of Block\n";
@@ -438,12 +452,31 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
438452
void VisitDeclStmt(const DeclStmt *DS) {
439453
for (const Decl *D : DS->decls())
440454
if (const auto *VD = dyn_cast<VarDecl>(D))
441-
if (hasOrigin(VD->getType()))
455+
if (hasOrigin(VD))
442456
if (const Expr *InitExpr = VD->getInit())
443457
addAssignOriginFact(*VD, *InitExpr);
444458
}
445459

446-
void VisitDeclRefExpr(const DeclRefExpr *DRE) { handleUse(DRE); }
460+
void VisitDeclRefExpr(const DeclRefExpr *DRE) {
461+
handleUse(DRE);
462+
// For non-pointer/non-view types, a reference to the variable's storage
463+
// is a borrow. We create a loan for it.
464+
// For pointer/view types, we stick to the existing model for now and do
465+
// not create an extra origin for the l-value expression itself.
466+
467+
// TODO: A single origin for a `DeclRefExpr` for a pointer or view type is
468+
// not sufficient to model the different levels of indirection. The current
469+
// single-origin model cannot distinguish between a loan to the variable's
470+
// storage and a loan to what it points to. A multi-origin model would be
471+
// required for this.
472+
if (!isPointerType(DRE->getType())) {
473+
if (const Loan *L = createLoan(DRE)) {
474+
OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE);
475+
CurrentBlockFacts.push_back(
476+
FactMgr.createFact<IssueFact>(L->ID, ExprOID));
477+
}
478+
}
479+
}
447480

448481
void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) {
449482
/// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized
@@ -452,38 +485,31 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
452485
}
453486

454487
void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
455-
if (!hasOrigin(ICE->getType()))
488+
if (!hasOrigin(ICE))
456489
return;
457490
// An ImplicitCastExpr node itself gets an origin, which flows from the
458491
// origin of its sub-expression (after stripping its own parens/casts).
459-
// TODO: Consider if this is actually useful in practice. Alternatively, we
460-
// could directly use the sub-expression's OriginID instead of creating a
461-
// new one.
462492
addAssignOriginFact(*ICE, *ICE->getSubExpr());
463493
}
464494

465495
void VisitUnaryOperator(const UnaryOperator *UO) {
466496
if (UO->getOpcode() == UO_AddrOf) {
467497
const Expr *SubExpr = UO->getSubExpr();
468-
if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) {
469-
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
470-
// Check if it's a local variable.
471-
if (VD->hasLocalStorage()) {
472-
OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO);
473-
AccessPath AddrOfLocalVarPath(VD);
474-
const Loan &L =
475-
FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, UO);
476-
CurrentBlockFacts.push_back(
477-
FactMgr.createFact<IssueFact>(L.ID, OID));
478-
}
479-
}
480-
}
498+
// Taking address of a pointer-type expression is not yet supported and
499+
// will be supported in multi-origin model.
500+
if (isPointerType(SubExpr->getType()))
501+
return;
502+
// The origin of an address-of expression (e.g., &x) is the origin of
503+
// its sub-expression (x). This fact will cause the dataflow analysis
504+
// to propagate any loans held by the sub-expression's origin to the
505+
// origin of this UnaryOperator expression.
506+
addAssignOriginFact(*UO, *SubExpr);
481507
}
482508
}
483509

484510
void VisitReturnStmt(const ReturnStmt *RS) {
485511
if (const Expr *RetExpr = RS->getRetValue()) {
486-
if (hasOrigin(RetExpr->getType())) {
512+
if (hasOrigin(RetExpr)) {
487513
OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr);
488514
CurrentBlockFacts.push_back(
489515
FactMgr.createFact<ReturnOfOriginFact>(OID));
@@ -506,20 +532,6 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
506532
// expression.
507533
if (VisitTestPoint(FCE))
508534
return;
509-
// Visit as normal otherwise.
510-
Base::VisitCXXFunctionalCastExpr(FCE);
511-
}
512-
513-
private:
514-
// Check if a type has an origin.
515-
bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); }
516-
517-
template <typename Destination, typename Source>
518-
void addAssignOriginFact(const Destination &D, const Source &S) {
519-
OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D);
520-
OriginID SrcOID = FactMgr.getOriginMgr().get(S);
521-
CurrentBlockFacts.push_back(
522-
FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID));
523535
}
524536

525537
void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) {
@@ -544,6 +556,41 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
544556
}
545557
}
546558

559+
private:
560+
static bool isPointerType(QualType QT) {
561+
return QT->isPointerOrReferenceType();
562+
}
563+
564+
// Check if a type has an origin.
565+
static bool hasOrigin(const Expr *E) {
566+
return E->isGLValue() || isPointerType(E->getType());
567+
}
568+
569+
static bool hasOrigin(const VarDecl *VD) {
570+
return isPointerType(VD->getType());
571+
}
572+
573+
/// Creates a loan for the storage path of a given declaration reference.
574+
/// This function should be called whenever a DeclRefExpr represents a borrow.
575+
/// \param DRE The declaration reference expression that initiates the borrow.
576+
/// \return The new Loan on success, nullptr otherwise.
577+
const Loan *createLoan(const DeclRefExpr *DRE) {
578+
if (const auto *VD = dyn_cast<ValueDecl>(DRE->getDecl())) {
579+
AccessPath Path(VD);
580+
// The loan is created at the location of the DeclRefExpr.
581+
return &FactMgr.getLoanMgr().addLoan(Path, DRE);
582+
}
583+
return nullptr;
584+
}
585+
586+
template <typename Destination, typename Source>
587+
void addAssignOriginFact(const Destination &D, const Source &S) {
588+
OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D);
589+
OriginID SrcOID = FactMgr.getOriginMgr().get(S);
590+
CurrentBlockFacts.push_back(
591+
FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID));
592+
}
593+
547594
/// Checks if the expression is a `void("__lifetime_test_point_...")` cast.
548595
/// If so, creates a `TestPointFact` and returns true.
549596
bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) {
@@ -566,25 +613,26 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
566613
}
567614

568615
void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) {
616+
if (!hasOrigin(LHSExpr))
617+
return;
569618
// Find the underlying variable declaration for the left-hand side.
570619
if (const auto *DRE_LHS =
571620
dyn_cast<DeclRefExpr>(LHSExpr->IgnoreParenImpCasts())) {
572621
markUseAsWrite(DRE_LHS);
573622
if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl()))
574-
if (hasOrigin(LHSExpr->getType()))
575-
// We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`
576-
// LHS must be a pointer/reference type that can be an origin.
577-
// RHS must also represent an origin (either another pointer/ref or an
578-
// address-of).
579-
addAssignOriginFact(*VD_LHS, *RHSExpr);
623+
// We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`.
624+
// LHS must be a pointer/reference type that can be an origin. RHS must
625+
// also represent an origin (either another pointer/ref or an
626+
// address-of).
627+
addAssignOriginFact(*VD_LHS, *RHSExpr);
580628
}
581629
}
582630

583-
// A DeclRefExpr is a use of the referenced decl. It is checked for
584-
// use-after-free unless it is being written to (e.g. on the left-hand side
585-
// of an assignment).
631+
// A DeclRefExpr will be treated as a use of the referenced decl. It will be
632+
// checked for use-after-free unless it is later marked as being written to
633+
// (e.g. on the left-hand side of an assignment).
586634
void handleUse(const DeclRefExpr *DRE) {
587-
if (hasOrigin(DRE->getType())) {
635+
if (isPointerType(DRE->getType())) {
588636
UseFact *UF = FactMgr.createFact<UseFact>(DRE);
589637
CurrentBlockFacts.push_back(UF);
590638
assert(!UseFacts.contains(DRE));

0 commit comments

Comments
 (0)