-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LifetimeSafety] Reorganize code into modular components #162474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LifetimeSafety] Reorganize code into modular components #162474
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
663d9c3
to
76f9e5c
Compare
Apply changes from code browser
Apply changes from code browser
Apply changes from code browser
Apply suggested changes
Apply changes from code browser
f31d078
to
1aeea3f
Compare
Apply changes from code browser
1aeea3f
to
98bb91d
Compare
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-analysis Author: Utkarsh Saxena (usx95) ChangesRestructure the C++ Lifetime Safety Analysis into modular components with clear separation of concerns. This PR reorganizes the C++ Lifetime Safety Analysis code by:
The code functionality remains the same, but is now better organized with clearer interfaces between components. Patch is 150.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162474.diff 27 Files Affected:
diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml
index c49fd1dd3cc7f..efdc42d349195 100644
--- a/.github/new-prs-labeler.yml
+++ b/.github/new-prs-labeler.yml
@@ -1096,8 +1096,8 @@ clang:openmp:
- llvm/test/Transforms/OpenMP/**
clang:temporal-safety:
- - clang/include/clang/Analysis/Analyses/LifetimeSafety*
- - clang/lib/Analysis/LifetimeSafety*
+ - clang/include/clang/Analysis/Analyses/LifetimeSafety/**
+ - clang/lib/Analysis/LifetimeSafety/**
- clang/unittests/Analysis/LifetimeSafety*
- clang/test/Sema/*lifetime-safety*
- clang/test/Sema/*lifetime-analysis*
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h
new file mode 100644
index 0000000000000..b059e00d7bdaf
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h
@@ -0,0 +1,38 @@
+//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the entry point for lifetime checking, which detects
+// use-after-free errors by checking if live origins hold loans that have
+// expired.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H
+
+#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Reporter.h"
+
+namespace clang {
+namespace lifetimes {
+namespace internal {
+
+/// Runs the lifetime checker, which detects use-after-free errors by
+/// examining loan expiration points and checking if any live origins hold
+/// the expired loan.
+void runLifetimeChecker(LoanPropagationAnalysis &LoanPropagation,
+ LiveOriginAnalysis &LiveOrigins, FactManager &FactMgr,
+ AnalysisDeclContext &ADC,
+ LifetimeSafetyReporter *Reporter);
+
+} // namespace internal
+} // namespace lifetimes
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Dataflow.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Dataflow.h
new file mode 100644
index 0000000000000..e818b26739e18
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Dataflow.h
@@ -0,0 +1,190 @@
+//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a generic, policy-based driver for dataflow analyses.
+// It provides a flexible framework that combines the dataflow runner and
+// transfer functions, allowing derived classes to implement specific analyses
+// by defining their lattice, join, and transfer functions.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H
+
+#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <optional>
+
+namespace clang::lifetimes {
+namespace internal {
+
+enum class Direction { Forward, Backward };
+
+/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
+/// `Fact`. identified by a lifetime-related event (`Fact`).
+///
+/// A `ProgramPoint` has "after" semantics: it represents the location
+/// immediately after its corresponding `Fact`.
+using ProgramPoint = const Fact *;
+
+/// A generic, policy-based driver for dataflow analyses. It combines
+/// the dataflow runner and the transferer logic into a single class hierarchy.
+///
+/// The derived class is expected to provide:
+/// - A `Lattice` type.
+/// - `StringRef getAnalysisName() const`
+/// - `Lattice getInitialState();` The initial state of the analysis.
+/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths.
+/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single
+/// lifetime-relevant `Fact` transforms the lattice state. Only overloads
+/// for facts relevant to the analysis need to be implemented.
+///
+/// \tparam Derived The CRTP derived class that implements the specific
+/// analysis.
+/// \tparam LatticeType The dataflow lattice used by the analysis.
+/// \tparam Dir The direction of the analysis (Forward or Backward).
+/// TODO: Maybe use the dataflow framework! The framework might need changes
+/// to support the current comparison done at block-entry.
+template <typename Derived, typename LatticeType, Direction Dir>
+class DataflowAnalysis {
+public:
+ using Lattice = LatticeType;
+ using Base = DataflowAnalysis<Derived, Lattice, Dir>;
+
+private:
+ const CFG &Cfg;
+ AnalysisDeclContext &AC;
+
+ /// The dataflow state before a basic block is processed.
+ llvm::DenseMap<const CFGBlock *, Lattice> InStates;
+ /// The dataflow state after a basic block is processed.
+ llvm::DenseMap<const CFGBlock *, Lattice> OutStates;
+ /// The dataflow state at a Program Point.
+ /// In a forward analysis, this is the state after the Fact at that point has
+ /// been applied, while in a backward analysis, it is the state before.
+ llvm::DenseMap<ProgramPoint, Lattice> PerPointStates;
+
+ static constexpr bool isForward() { return Dir == Direction::Forward; }
+
+protected:
+ FactManager &AllFacts;
+
+ explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC,
+ FactManager &F)
+ : Cfg(C), AC(AC), AllFacts(F) {}
+
+public:
+ void run() {
+ Derived &D = static_cast<Derived &>(*this);
+ llvm::TimeTraceScope Time(D.getAnalysisName());
+
+ using Worklist =
+ std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist,
+ BackwardDataflowWorklist>;
+ Worklist W(Cfg, AC);
+
+ const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit();
+ InStates[Start] = D.getInitialState();
+ W.enqueueBlock(Start);
+
+ while (const CFGBlock *B = W.dequeue()) {
+ Lattice StateIn = *getInState(B);
+ Lattice StateOut = transferBlock(B, StateIn);
+ OutStates[B] = StateOut;
+ for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) {
+ if (!AdjacentB)
+ continue;
+ std::optional<Lattice> OldInState = getInState(AdjacentB);
+ Lattice NewInState =
+ !OldInState ? StateOut : D.join(*OldInState, StateOut);
+ // Enqueue the adjacent block if its in-state has changed or if we have
+ // never seen it.
+ if (!OldInState || NewInState != *OldInState) {
+ InStates[AdjacentB] = NewInState;
+ W.enqueueBlock(AdjacentB);
+ }
+ }
+ }
+ }
+
+protected:
+ Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); }
+
+ std::optional<Lattice> getInState(const CFGBlock *B) const {
+ auto It = InStates.find(B);
+ if (It == InStates.end())
+ return std::nullopt;
+ return It->second;
+ }
+
+ Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); }
+
+ void dump() const {
+ const Derived *D = static_cast<const Derived *>(this);
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << D->getAnalysisName() << " results:\n";
+ llvm::dbgs() << "==========================================\n";
+ const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry();
+ getOutState(&B).dump(llvm::dbgs());
+ }
+
+private:
+ /// Computes the state at one end of a block by applying all its facts
+ /// sequentially to a given state from the other end.
+ Lattice transferBlock(const CFGBlock *Block, Lattice State) {
+ auto Facts = AllFacts.getFacts(Block);
+ if constexpr (isForward()) {
+ for (const Fact *F : Facts) {
+ State = transferFact(State, F);
+ PerPointStates[F] = State;
+ }
+ } else {
+ for (const Fact *F : llvm::reverse(Facts)) {
+ // In backward analysis, capture the state before applying the fact.
+ PerPointStates[F] = State;
+ State = transferFact(State, F);
+ }
+ }
+ return State;
+ }
+
+ Lattice transferFact(Lattice In, const Fact *F) {
+ assert(F);
+ Derived *D = static_cast<Derived *>(this);
+ switch (F->getKind()) {
+ case Fact::Kind::Issue:
+ return D->transfer(In, *F->getAs<IssueFact>());
+ case Fact::Kind::Expire:
+ return D->transfer(In, *F->getAs<ExpireFact>());
+ case Fact::Kind::OriginFlow:
+ return D->transfer(In, *F->getAs<OriginFlowFact>());
+ case Fact::Kind::ReturnOfOrigin:
+ return D->transfer(In, *F->getAs<ReturnOfOriginFact>());
+ case Fact::Kind::Use:
+ return D->transfer(In, *F->getAs<UseFact>());
+ case Fact::Kind::TestPoint:
+ return D->transfer(In, *F->getAs<TestPointFact>());
+ }
+ llvm_unreachable("Unknown fact kind");
+ }
+
+public:
+ Lattice transfer(Lattice In, const IssueFact &) { return In; }
+ Lattice transfer(Lattice In, const ExpireFact &) { return In; }
+ Lattice transfer(Lattice In, const OriginFlowFact &) { return In; }
+ Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; }
+ Lattice transfer(Lattice In, const UseFact &) { return In; }
+ Lattice transfer(Lattice In, const TestPointFact &) { return In; }
+};
+} // namespace internal
+} // namespace clang::lifetimes
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h
new file mode 100644
index 0000000000000..5cfc1ed67c406
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h
@@ -0,0 +1,272 @@
+//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines Facts, which are atomic lifetime-relevant events (such as
+// loan issuance, loan expiration, origin flow, and use), and the FactManager,
+// which manages the storage and retrieval of facts for each CFG block.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H
+
+#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
+#include "clang/Analysis/Analyses/PostOrderCFGView.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include <cstdint>
+
+namespace clang::lifetimes {
+namespace internal {
+/// An abstract base class for a single, atomic lifetime-relevant event.
+class Fact {
+
+public:
+ enum class Kind : uint8_t {
+ /// A new loan is issued from a borrow expression (e.g., &x).
+ Issue,
+ /// A loan expires as its underlying storage is freed (e.g., variable goes
+ /// out of scope).
+ Expire,
+ /// An origin is propagated from a source to a destination (e.g., p = q).
+ /// This can also optionally kill the destination origin before flowing into
+ /// it. Otherwise, the source's loan set is merged into the destination's
+ /// loan set.
+ OriginFlow,
+ /// An origin escapes the function by flowing into the return value.
+ ReturnOfOrigin,
+ /// An origin is used (eg. appears as l-value expression like DeclRefExpr).
+ Use,
+ /// A marker for a specific point in the code, for testing.
+ TestPoint,
+ };
+
+private:
+ Kind K;
+
+protected:
+ Fact(Kind K) : K(K) {}
+
+public:
+ virtual ~Fact() = default;
+ Kind getKind() const { return K; }
+
+ template <typename T> const T *getAs() const {
+ if (T::classof(this))
+ return static_cast<const T *>(this);
+ return nullptr;
+ }
+
+ virtual void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &) const {
+ OS << "Fact (Kind: " << static_cast<int>(K) << ")\n";
+ }
+};
+
+/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
+/// `Fact`. identified by a lifetime-related event (`Fact`).
+///
+/// A `ProgramPoint` has "after" semantics: it represents the location
+/// immediately after its corresponding `Fact`.
+using ProgramPoint = const Fact *;
+
+class IssueFact : public Fact {
+ LoanID LID;
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
+
+ IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {}
+ LoanID getLoanID() const { return LID; }
+ OriginID getOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS, const LoanManager &LM,
+ const OriginManager &OM) const override {
+ OS << "Issue (";
+ LM.getLoan(getLoanID()).dump(OS);
+ OS << ", ToOrigin: ";
+ OM.dump(getOriginID(), OS);
+ OS << ")\n";
+ }
+};
+
+class ExpireFact : public Fact {
+ LoanID LID;
+ SourceLocation ExpiryLoc;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
+
+ ExpireFact(LoanID LID, SourceLocation ExpiryLoc)
+ : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {}
+
+ LoanID getLoanID() const { return LID; }
+ SourceLocation getExpiryLoc() const { return ExpiryLoc; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &LM,
+ const OriginManager &) const override {
+ OS << "Expire (";
+ LM.getLoan(getLoanID()).dump(OS);
+ OS << ")\n";
+ }
+};
+
+class OriginFlowFact : public Fact {
+ OriginID OIDDest;
+ OriginID OIDSrc;
+ // True if the destination origin should be killed (i.e., its current loans
+ // cleared) before the source origin's loans are flowed into it.
+ bool KillDest;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::OriginFlow;
+ }
+
+ OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest)
+ : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc),
+ KillDest(KillDest) {}
+
+ OriginID getDestOriginID() const { return OIDDest; }
+ OriginID getSrcOriginID() const { return OIDSrc; }
+ bool getKillDest() const { return KillDest; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "OriginFlow (Dest: ";
+ OM.dump(getDestOriginID(), OS);
+ OS << ", Src: ";
+ OM.dump(getSrcOriginID(), OS);
+ OS << (getKillDest() ? "" : ", Merge");
+ OS << ")\n";
+ }
+};
+
+class ReturnOfOriginFact : public Fact {
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::ReturnOfOrigin;
+ }
+
+ ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {}
+ OriginID getReturnedOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "ReturnOfOrigin (";
+ OM.dump(getReturnedOriginID(), OS);
+ OS << ")\n";
+ }
+};
+
+class UseFact : public Fact {
+ const Expr *UseExpr;
+ // True if this use is a write operation (e.g., left-hand side of assignment).
+ // Write operations are exempted from use-after-free checks.
+ bool IsWritten = false;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Use; }
+
+ UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {}
+
+ OriginID getUsedOrigin(const OriginManager &OM) const {
+ // TODO: Remove const cast and make OriginManager::get as const.
+ return const_cast<OriginManager &>(OM).get(*UseExpr);
+ }
+ const Expr *getUseExpr() const { return UseExpr; }
+ void markAsWritten() { IsWritten = true; }
+ bool isWritten() const { return IsWritten; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "Use (";
+ OM.dump(getUsedOrigin(OM), OS);
+ OS << ", " << (isWritten() ? "Write" : "Read") << ")\n";
+ }
+};
+
+/// A dummy-fact used to mark a specific point in the code for testing.
+/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast.
+class TestPointFact : public Fact {
+ StringRef Annotation;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; }
+
+ explicit TestPointFact(StringRef Annotation)
+ : Fact(Kind::TestPoint), Annotation(Annotation) {}
+
+ StringRef getAnnotation() const { return Annotation; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &) const override {
+ OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n";
+ }
+};
+
+class FactManager {
+public:
+ llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end())
+ return It->second;
+ return {};
+ }
+
+ void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
+ if (!NewFacts.empty())
+ BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
+ }
+
+ template <typename FactType, typename... Args>
+ FactType *createFact(Args &&...args) {
+ void *Mem = FactAllocator.Allocate<FactType>();
+ return new (Mem) FactType(std::forward<Args>(args)...);
+ }
+
+ void dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << " Lifetime Analysis Facts:\n";
+ llvm::dbgs() << "==========================================\n";
+ if (const Decl *D = AC.getDecl())
+ if (const auto *ND = dyn_cast<NamedDecl>(D))
+ llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
+ // Print blocks in the order as they appear in code for a stable ordering.
+ for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) {
+ llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end()) {
+ for (const Fact *F : It->second) {
+ llvm::dbgs() << " ";
+ F->dump(llvm::dbgs(), LoanMgr, OriginMgr);
+ }
+ }
+ llvm::dbgs() << " End of Block\n";
+ }
+ }
+
+ LoanManager &getLoanMgr() { return LoanMgr; }
+ const LoanManager &getLoanMgr() const { return LoanMgr; }
+ OriginManager &getOriginMgr() { return OriginMgr; }
+ const OriginManager &getOriginMgr() const { return OriginMgr; }
+
+private:
+ LoanManager LoanMgr;
+ OriginManager OriginMgr;
+ llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>>
+ BlockToFactsMap;
+ llvm::BumpPtrAllocator FactAllocator;
+};
+} // namespace internal
+} // namespace clang::lifetimes
+
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
new file mode 100644
index 0000000000000..a904f3dc64a06
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -0,0 +1,108 @@
+//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-=...
[truncated]
|
@llvm/pr-subscribers-clang-temporal-safety Author: Utkarsh Saxena (usx95) ChangesRestructure the C++ Lifetime Safety Analysis into modular components with clear separation of concerns. This PR reorganizes the C++ Lifetime Safety Analysis code by:
The code functionality remains the same, but is now better organized with clearer interfaces between components. Patch is 150.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162474.diff 27 Files Affected:
diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml
index c49fd1dd3cc7f..efdc42d349195 100644
--- a/.github/new-prs-labeler.yml
+++ b/.github/new-prs-labeler.yml
@@ -1096,8 +1096,8 @@ clang:openmp:
- llvm/test/Transforms/OpenMP/**
clang:temporal-safety:
- - clang/include/clang/Analysis/Analyses/LifetimeSafety*
- - clang/lib/Analysis/LifetimeSafety*
+ - clang/include/clang/Analysis/Analyses/LifetimeSafety/**
+ - clang/lib/Analysis/LifetimeSafety/**
- clang/unittests/Analysis/LifetimeSafety*
- clang/test/Sema/*lifetime-safety*
- clang/test/Sema/*lifetime-analysis*
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h
new file mode 100644
index 0000000000000..b059e00d7bdaf
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h
@@ -0,0 +1,38 @@
+//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the entry point for lifetime checking, which detects
+// use-after-free errors by checking if live origins hold loans that have
+// expired.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H
+
+#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Reporter.h"
+
+namespace clang {
+namespace lifetimes {
+namespace internal {
+
+/// Runs the lifetime checker, which detects use-after-free errors by
+/// examining loan expiration points and checking if any live origins hold
+/// the expired loan.
+void runLifetimeChecker(LoanPropagationAnalysis &LoanPropagation,
+ LiveOriginAnalysis &LiveOrigins, FactManager &FactMgr,
+ AnalysisDeclContext &ADC,
+ LifetimeSafetyReporter *Reporter);
+
+} // namespace internal
+} // namespace lifetimes
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Dataflow.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Dataflow.h
new file mode 100644
index 0000000000000..e818b26739e18
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Dataflow.h
@@ -0,0 +1,190 @@
+//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a generic, policy-based driver for dataflow analyses.
+// It provides a flexible framework that combines the dataflow runner and
+// transfer functions, allowing derived classes to implement specific analyses
+// by defining their lattice, join, and transfer functions.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H
+
+#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <optional>
+
+namespace clang::lifetimes {
+namespace internal {
+
+enum class Direction { Forward, Backward };
+
+/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
+/// `Fact`. identified by a lifetime-related event (`Fact`).
+///
+/// A `ProgramPoint` has "after" semantics: it represents the location
+/// immediately after its corresponding `Fact`.
+using ProgramPoint = const Fact *;
+
+/// A generic, policy-based driver for dataflow analyses. It combines
+/// the dataflow runner and the transferer logic into a single class hierarchy.
+///
+/// The derived class is expected to provide:
+/// - A `Lattice` type.
+/// - `StringRef getAnalysisName() const`
+/// - `Lattice getInitialState();` The initial state of the analysis.
+/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths.
+/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single
+/// lifetime-relevant `Fact` transforms the lattice state. Only overloads
+/// for facts relevant to the analysis need to be implemented.
+///
+/// \tparam Derived The CRTP derived class that implements the specific
+/// analysis.
+/// \tparam LatticeType The dataflow lattice used by the analysis.
+/// \tparam Dir The direction of the analysis (Forward or Backward).
+/// TODO: Maybe use the dataflow framework! The framework might need changes
+/// to support the current comparison done at block-entry.
+template <typename Derived, typename LatticeType, Direction Dir>
+class DataflowAnalysis {
+public:
+ using Lattice = LatticeType;
+ using Base = DataflowAnalysis<Derived, Lattice, Dir>;
+
+private:
+ const CFG &Cfg;
+ AnalysisDeclContext &AC;
+
+ /// The dataflow state before a basic block is processed.
+ llvm::DenseMap<const CFGBlock *, Lattice> InStates;
+ /// The dataflow state after a basic block is processed.
+ llvm::DenseMap<const CFGBlock *, Lattice> OutStates;
+ /// The dataflow state at a Program Point.
+ /// In a forward analysis, this is the state after the Fact at that point has
+ /// been applied, while in a backward analysis, it is the state before.
+ llvm::DenseMap<ProgramPoint, Lattice> PerPointStates;
+
+ static constexpr bool isForward() { return Dir == Direction::Forward; }
+
+protected:
+ FactManager &AllFacts;
+
+ explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC,
+ FactManager &F)
+ : Cfg(C), AC(AC), AllFacts(F) {}
+
+public:
+ void run() {
+ Derived &D = static_cast<Derived &>(*this);
+ llvm::TimeTraceScope Time(D.getAnalysisName());
+
+ using Worklist =
+ std::conditional_t<Dir == Direction::Forward, ForwardDataflowWorklist,
+ BackwardDataflowWorklist>;
+ Worklist W(Cfg, AC);
+
+ const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit();
+ InStates[Start] = D.getInitialState();
+ W.enqueueBlock(Start);
+
+ while (const CFGBlock *B = W.dequeue()) {
+ Lattice StateIn = *getInState(B);
+ Lattice StateOut = transferBlock(B, StateIn);
+ OutStates[B] = StateOut;
+ for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) {
+ if (!AdjacentB)
+ continue;
+ std::optional<Lattice> OldInState = getInState(AdjacentB);
+ Lattice NewInState =
+ !OldInState ? StateOut : D.join(*OldInState, StateOut);
+ // Enqueue the adjacent block if its in-state has changed or if we have
+ // never seen it.
+ if (!OldInState || NewInState != *OldInState) {
+ InStates[AdjacentB] = NewInState;
+ W.enqueueBlock(AdjacentB);
+ }
+ }
+ }
+ }
+
+protected:
+ Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); }
+
+ std::optional<Lattice> getInState(const CFGBlock *B) const {
+ auto It = InStates.find(B);
+ if (It == InStates.end())
+ return std::nullopt;
+ return It->second;
+ }
+
+ Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); }
+
+ void dump() const {
+ const Derived *D = static_cast<const Derived *>(this);
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << D->getAnalysisName() << " results:\n";
+ llvm::dbgs() << "==========================================\n";
+ const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry();
+ getOutState(&B).dump(llvm::dbgs());
+ }
+
+private:
+ /// Computes the state at one end of a block by applying all its facts
+ /// sequentially to a given state from the other end.
+ Lattice transferBlock(const CFGBlock *Block, Lattice State) {
+ auto Facts = AllFacts.getFacts(Block);
+ if constexpr (isForward()) {
+ for (const Fact *F : Facts) {
+ State = transferFact(State, F);
+ PerPointStates[F] = State;
+ }
+ } else {
+ for (const Fact *F : llvm::reverse(Facts)) {
+ // In backward analysis, capture the state before applying the fact.
+ PerPointStates[F] = State;
+ State = transferFact(State, F);
+ }
+ }
+ return State;
+ }
+
+ Lattice transferFact(Lattice In, const Fact *F) {
+ assert(F);
+ Derived *D = static_cast<Derived *>(this);
+ switch (F->getKind()) {
+ case Fact::Kind::Issue:
+ return D->transfer(In, *F->getAs<IssueFact>());
+ case Fact::Kind::Expire:
+ return D->transfer(In, *F->getAs<ExpireFact>());
+ case Fact::Kind::OriginFlow:
+ return D->transfer(In, *F->getAs<OriginFlowFact>());
+ case Fact::Kind::ReturnOfOrigin:
+ return D->transfer(In, *F->getAs<ReturnOfOriginFact>());
+ case Fact::Kind::Use:
+ return D->transfer(In, *F->getAs<UseFact>());
+ case Fact::Kind::TestPoint:
+ return D->transfer(In, *F->getAs<TestPointFact>());
+ }
+ llvm_unreachable("Unknown fact kind");
+ }
+
+public:
+ Lattice transfer(Lattice In, const IssueFact &) { return In; }
+ Lattice transfer(Lattice In, const ExpireFact &) { return In; }
+ Lattice transfer(Lattice In, const OriginFlowFact &) { return In; }
+ Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; }
+ Lattice transfer(Lattice In, const UseFact &) { return In; }
+ Lattice transfer(Lattice In, const TestPointFact &) { return In; }
+};
+} // namespace internal
+} // namespace clang::lifetimes
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h
new file mode 100644
index 0000000000000..5cfc1ed67c406
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h
@@ -0,0 +1,272 @@
+//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines Facts, which are atomic lifetime-relevant events (such as
+// loan issuance, loan expiration, origin flow, and use), and the FactManager,
+// which manages the storage and retrieval of facts for each CFG block.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H
+
+#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h"
+#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
+#include "clang/Analysis/Analyses/PostOrderCFGView.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include <cstdint>
+
+namespace clang::lifetimes {
+namespace internal {
+/// An abstract base class for a single, atomic lifetime-relevant event.
+class Fact {
+
+public:
+ enum class Kind : uint8_t {
+ /// A new loan is issued from a borrow expression (e.g., &x).
+ Issue,
+ /// A loan expires as its underlying storage is freed (e.g., variable goes
+ /// out of scope).
+ Expire,
+ /// An origin is propagated from a source to a destination (e.g., p = q).
+ /// This can also optionally kill the destination origin before flowing into
+ /// it. Otherwise, the source's loan set is merged into the destination's
+ /// loan set.
+ OriginFlow,
+ /// An origin escapes the function by flowing into the return value.
+ ReturnOfOrigin,
+ /// An origin is used (eg. appears as l-value expression like DeclRefExpr).
+ Use,
+ /// A marker for a specific point in the code, for testing.
+ TestPoint,
+ };
+
+private:
+ Kind K;
+
+protected:
+ Fact(Kind K) : K(K) {}
+
+public:
+ virtual ~Fact() = default;
+ Kind getKind() const { return K; }
+
+ template <typename T> const T *getAs() const {
+ if (T::classof(this))
+ return static_cast<const T *>(this);
+ return nullptr;
+ }
+
+ virtual void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &) const {
+ OS << "Fact (Kind: " << static_cast<int>(K) << ")\n";
+ }
+};
+
+/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
+/// `Fact`. identified by a lifetime-related event (`Fact`).
+///
+/// A `ProgramPoint` has "after" semantics: it represents the location
+/// immediately after its corresponding `Fact`.
+using ProgramPoint = const Fact *;
+
+class IssueFact : public Fact {
+ LoanID LID;
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
+
+ IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {}
+ LoanID getLoanID() const { return LID; }
+ OriginID getOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS, const LoanManager &LM,
+ const OriginManager &OM) const override {
+ OS << "Issue (";
+ LM.getLoan(getLoanID()).dump(OS);
+ OS << ", ToOrigin: ";
+ OM.dump(getOriginID(), OS);
+ OS << ")\n";
+ }
+};
+
+class ExpireFact : public Fact {
+ LoanID LID;
+ SourceLocation ExpiryLoc;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
+
+ ExpireFact(LoanID LID, SourceLocation ExpiryLoc)
+ : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {}
+
+ LoanID getLoanID() const { return LID; }
+ SourceLocation getExpiryLoc() const { return ExpiryLoc; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &LM,
+ const OriginManager &) const override {
+ OS << "Expire (";
+ LM.getLoan(getLoanID()).dump(OS);
+ OS << ")\n";
+ }
+};
+
+class OriginFlowFact : public Fact {
+ OriginID OIDDest;
+ OriginID OIDSrc;
+ // True if the destination origin should be killed (i.e., its current loans
+ // cleared) before the source origin's loans are flowed into it.
+ bool KillDest;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::OriginFlow;
+ }
+
+ OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest)
+ : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc),
+ KillDest(KillDest) {}
+
+ OriginID getDestOriginID() const { return OIDDest; }
+ OriginID getSrcOriginID() const { return OIDSrc; }
+ bool getKillDest() const { return KillDest; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "OriginFlow (Dest: ";
+ OM.dump(getDestOriginID(), OS);
+ OS << ", Src: ";
+ OM.dump(getSrcOriginID(), OS);
+ OS << (getKillDest() ? "" : ", Merge");
+ OS << ")\n";
+ }
+};
+
+class ReturnOfOriginFact : public Fact {
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::ReturnOfOrigin;
+ }
+
+ ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {}
+ OriginID getReturnedOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "ReturnOfOrigin (";
+ OM.dump(getReturnedOriginID(), OS);
+ OS << ")\n";
+ }
+};
+
+class UseFact : public Fact {
+ const Expr *UseExpr;
+ // True if this use is a write operation (e.g., left-hand side of assignment).
+ // Write operations are exempted from use-after-free checks.
+ bool IsWritten = false;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Use; }
+
+ UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {}
+
+ OriginID getUsedOrigin(const OriginManager &OM) const {
+ // TODO: Remove const cast and make OriginManager::get as const.
+ return const_cast<OriginManager &>(OM).get(*UseExpr);
+ }
+ const Expr *getUseExpr() const { return UseExpr; }
+ void markAsWritten() { IsWritten = true; }
+ bool isWritten() const { return IsWritten; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &OM) const override {
+ OS << "Use (";
+ OM.dump(getUsedOrigin(OM), OS);
+ OS << ", " << (isWritten() ? "Write" : "Read") << ")\n";
+ }
+};
+
+/// A dummy-fact used to mark a specific point in the code for testing.
+/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast.
+class TestPointFact : public Fact {
+ StringRef Annotation;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; }
+
+ explicit TestPointFact(StringRef Annotation)
+ : Fact(Kind::TestPoint), Annotation(Annotation) {}
+
+ StringRef getAnnotation() const { return Annotation; }
+
+ void dump(llvm::raw_ostream &OS, const LoanManager &,
+ const OriginManager &) const override {
+ OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n";
+ }
+};
+
+class FactManager {
+public:
+ llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end())
+ return It->second;
+ return {};
+ }
+
+ void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
+ if (!NewFacts.empty())
+ BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
+ }
+
+ template <typename FactType, typename... Args>
+ FactType *createFact(Args &&...args) {
+ void *Mem = FactAllocator.Allocate<FactType>();
+ return new (Mem) FactType(std::forward<Args>(args)...);
+ }
+
+ void dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << " Lifetime Analysis Facts:\n";
+ llvm::dbgs() << "==========================================\n";
+ if (const Decl *D = AC.getDecl())
+ if (const auto *ND = dyn_cast<NamedDecl>(D))
+ llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
+ // Print blocks in the order as they appear in code for a stable ordering.
+ for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) {
+ llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end()) {
+ for (const Fact *F : It->second) {
+ llvm::dbgs() << " ";
+ F->dump(llvm::dbgs(), LoanMgr, OriginMgr);
+ }
+ }
+ llvm::dbgs() << " End of Block\n";
+ }
+ }
+
+ LoanManager &getLoanMgr() { return LoanMgr; }
+ const LoanManager &getLoanMgr() const { return LoanMgr; }
+ OriginManager &getOriginMgr() { return OriginMgr; }
+ const OriginManager &getOriginMgr() const { return OriginMgr; }
+
+private:
+ LoanManager LoanMgr;
+ OriginManager OriginMgr;
+ llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>>
+ BlockToFactsMap;
+ llvm::BumpPtrAllocator FactAllocator;
+};
+} // namespace internal
+} // namespace clang::lifetimes
+
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
new file mode 100644
index 0000000000000..a904f3dc64a06
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -0,0 +1,108 @@
+//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-=...
[truncated]
|
Restructure the C++ Lifetime Safety Analysis into modular components with clear separation of concerns.
This PR reorganizes the C++ Lifetime Safety Analysis code by:
LifetimeSafety.cpp
(1500+ lines) into multiple smaller, focused filesLifetimeSafety
directory with a clean component structureChecker.h/cpp
: Core lifetime checking logicDataflow.h
: Generic dataflow analysis frameworkFacts.h
: Lifetime-relevant events and fact managementFactsGenerator.h/cpp
: AST traversal for fact generationLiveOrigins.h/cpp
: Backward dataflow analysis for origin livenessLoanPropagation.h/cpp
: Forward dataflow analysis for loan trackingLoans.h
: Loan and access path definitionsOrigins.h
: Origin managementReporter.h
: Interface for reporting lifetime violationsUtils.h
: Common utilities for the analysisThe code functionality remains the same, but is now better organized with clearer interfaces between components.