From fe7a36e3fb6bf1965a94a97f2d231572a2fc281f Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Wed, 13 Nov 2024 15:12:55 -0800 Subject: [PATCH 01/11] [lld][ELF] Add --why-live flag (inspired by Mach-O) This prints the stack of reasons that symbols that match the given glob(s) survived GC. It has no effect unless section GC occurs. A symbol may be live intrisically, because referenced by another symbol or section, or because part of a live section. Sections have similar reasons. This implementation does not require -ffunction-sections or -fdata-sections to produce readable results, althought it does tend to work better (as does GC). --- lld/ELF/Config.h | 1 + lld/ELF/Driver.cpp | 9 +++ lld/ELF/MarkLive.cpp | 125 +++++++++++++++++++++++++++++++++---- lld/ELF/Options.td | 6 ++ lld/test/ELF/why-live.s | 132 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 260 insertions(+), 13 deletions(-) create mode 100644 lld/test/ELF/why-live.s diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index e07c7dd4ca1b6..03b3cd4771f49 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -226,6 +226,7 @@ struct Config { llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; llvm::StringRef whyExtract; + llvm::SmallVector whyLive; llvm::StringRef cmseInputLib; llvm::StringRef cmseOutputLib; ReportPolicy zBtiReport = ReportPolicy::None; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 3e7e05746483a..4555a85a4d216 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1545,6 +1545,15 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { ctx.arg.warnSymbolOrdering = args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract); + for (opt::Arg *arg : args.filtered(OPT_why_live)) { + StringRef value(arg->getValue()); + if (Expected pat = GlobPattern::create(arg->getValue())) { + ctx.arg.whyLive.emplace_back(std::move(*pat)); + } else { + ErrAlways(ctx) << arg->getSpelling() << ": " << pat.takeError(); + continue; + } + } ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); ctx.arg.zForceBti = hasZOption(args, "force-bti"); diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index b6c22884d9176..8e9e385bc26dc 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -29,9 +29,11 @@ #include "Target.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Strings.h" +#include "llvm/ADT/DenseMapInfoVariant.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Object/ELF.h" #include "llvm/Support/TimeProfiler.h" +#include #include using namespace llvm; @@ -42,6 +44,10 @@ using namespace lld; using namespace lld::elf; namespace { + +// Something that can be the most proximate reason that something else is alive. +typedef std::variant LiveReason; + template class MarkLive { public: MarkLive(Ctx &ctx, unsigned partition) : ctx(ctx), partition(partition) {} @@ -50,7 +56,10 @@ template class MarkLive { void moveToMain(); private: - void enqueue(InputSectionBase *sec, uint64_t offset); + void enqueue(InputSectionBase *sec, uint64_t offset = 0, + Symbol *sym = nullptr, + std::optional reason = std::nullopt); + void printWhyLive(Symbol *s) const; void markSymbol(Symbol *sym); void mark(); @@ -70,6 +79,12 @@ template class MarkLive { // There are normally few input sections whose names are valid C // identifiers, so we just store a SmallVector instead of a multimap. DenseMap> cNamedSections; + + // The most proximate reason that something is live. If something doesn't have + // a recorded reason, it is either dead, intrinsically live, or an + // unreferenced symbol in a live section. (These cases are trivially + // detectable and need not be stored.) + DenseMap whyLive; }; } // namespace @@ -101,6 +116,12 @@ void MarkLive::resolveReloc(InputSectionBase &sec, RelTy &rel, Symbol &sym = sec.file->getRelocTargetSym(rel); sym.used = true; + LiveReason reason; + if (!ctx.arg.whyLive.empty()) { + Defined *reasonSym = sec.getEnclosingSymbol(rel.r_offset); + reason = reasonSym ? LiveReason(reasonSym) : LiveReason(&sec); + } + if (auto *d = dyn_cast(&sym)) { auto *relSec = dyn_cast_or_null(d->section); if (!relSec) @@ -119,17 +140,29 @@ void MarkLive::resolveReloc(InputSectionBase &sec, RelTy &rel, // group/SHF_LINK_ORDER rules (b) if the associated text section should be // discarded, marking the LSDA will unnecessarily retain the text section. if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) || - relSec->nextInSectionGroup))) - enqueue(relSec, offset); + relSec->nextInSectionGroup))) { + Symbol *canonicalSym = d; + if (!ctx.arg.whyLive.empty() && d->isSection()) { + if (Symbol *s = relSec->getEnclosingSymbol(offset)) + canonicalSym = s; + else + canonicalSym = nullptr; + } + enqueue(relSec, offset, canonicalSym, reason); + } return; } - if (auto *ss = dyn_cast(&sym)) - if (!ss->isWeak()) + if (auto *ss = dyn_cast(&sym)) { + if (!ss->isWeak()) { cast(ss->file)->isNeeded = true; + if (!ctx.arg.whyLive.empty()) + whyLive.try_emplace(&sym, reason); + } + } for (InputSectionBase *sec : cNamedSections.lookup(sym.getName())) - enqueue(sec, 0); + enqueue(sec, 0, nullptr, reason); } // The .eh_frame section is an unfortunate special case. @@ -187,7 +220,8 @@ static bool isReserved(InputSectionBase *sec) { } template -void MarkLive::enqueue(InputSectionBase *sec, uint64_t offset) { +void MarkLive::enqueue(InputSectionBase *sec, uint64_t offset, + Symbol *sym, std::optional reason) { // Usually, a whole section is marked as live or dead, but in mergeable // (splittable) sections, each piece of data has independent liveness bit. // So we explicitly tell it which offset is in use. @@ -201,15 +235,71 @@ void MarkLive::enqueue(InputSectionBase *sec, uint64_t offset) { return; sec->partition = sec->partition ? 1 : partition; + if (!ctx.arg.whyLive.empty() && reason) { + if (sym) { + // If a specific symbol is referenced, that makes it alive. It may in turn + // make its section alive. + whyLive.try_emplace(sym, *reason); + whyLive.try_emplace(sec, sym); + } else { + // Otherwise, the reference generically makes the section live. + whyLive.try_emplace(sec, *reason); + } + } + // Add input section to the queue. if (InputSection *s = dyn_cast(sec)) queue.push_back(s); } +// Print the stack of reasons that the given symbol is live. +template void MarkLive::printWhyLive(Symbol *s) const { + // Skip dead symbols. A symbol is dead if it belongs to a dead section. + if (auto *d = dyn_cast(s)) { + auto *reason = dyn_cast_or_null(d->section); + if (reason && !reason->isLive()) + return; + } + + auto msg = Msg(ctx); + msg << "live symbol: " << toStr(ctx, *s); + + LiveReason cur = s; + while (true) { + auto it = whyLive.find(cur); + // If there is a specific reason this object is live... + if (it != whyLive.end()) { + cur = it->second; + } else { + // This object is live, but it has no tracked reason. It is either + // intrinsically live or an unreferenced symbol in a live section. Return + // in the first case. + if (!std::holds_alternative(cur)) + return; + auto *d = dyn_cast(std::get(cur)); + if (!d) + return; + auto *reason = dyn_cast_or_null(d->section); + if (!reason) + return; + cur = LiveReason{reason}; + } + + msg << "\n>>> kept live by "; + if (std::holds_alternative(cur)) { + auto *s = std::get(cur); + msg << toStr(ctx, *s); + } else { + auto *s = std::get(cur); + msg << toStr(ctx, s); + } + } +} + template void MarkLive::markSymbol(Symbol *sym) { if (auto *d = dyn_cast_or_null(sym)) if (auto *isec = dyn_cast_or_null(d->section)) - enqueue(isec, d->value); + enqueue(isec, d->value, sym); } // This is the main function of the garbage collector. @@ -256,7 +346,7 @@ template void MarkLive::run() { } for (InputSectionBase *sec : ctx.inputSections) { if (sec->flags & SHF_GNU_RETAIN) { - enqueue(sec, 0); + enqueue(sec, 0, nullptr, std::nullopt); continue; } if (sec->flags & SHF_LINK_ORDER) @@ -295,7 +385,7 @@ template void MarkLive::run() { // Preserve special sections and those which are specified in linker // script KEEP command. if (isReserved(sec) || ctx.script->shouldKeep(sec)) { - enqueue(sec, 0); + enqueue(sec); } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) && isValidCIdentifier(sec->name)) { // As a workaround for glibc libc.a before 2.34 @@ -323,11 +413,20 @@ template void MarkLive::mark() { resolveReloc(sec, rel, false); for (InputSectionBase *isec : sec.dependentSections) - enqueue(isec, 0); + enqueue(isec, 0, nullptr, &sec); // Mark the next group member. if (sec.nextInSectionGroup) - enqueue(sec.nextInSectionGroup, 0); + enqueue(sec.nextInSectionGroup, 0, nullptr, &sec); + } + + if (!ctx.arg.whyLive.empty()) { + for (Symbol *sym : ctx.symtab->getSymbols()) { + if (llvm::any_of(ctx.arg.whyLive, [sym](const llvm::GlobPattern &pat) { + return pat.match(sym->getName()); + })) + printWhyLive(sym); + } } } @@ -353,7 +452,7 @@ template void MarkLive::moveToMain() { continue; if (ctx.symtab->find(("__start_" + sec->name).str()) || ctx.symtab->find(("__stop_" + sec->name).str())) - enqueue(sec, 0); + enqueue(sec); } mark(); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index b3b12a0646875..feb24edc6f43a 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -572,6 +572,12 @@ defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and " "__real_symbol references to symbol">, MetaVarName<"">; +defm why_live + : EEq<"why-live", + "Report a chain of references preventing garbage collection for " + "each symbol matching ">, + MetaVarName<"">; + def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"