diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index e07c7dd4ca1b6..03b3cd4771f49 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -226,6 +226,7 @@ struct Config { llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; llvm::StringRef whyExtract; + llvm::SmallVector whyLive; llvm::StringRef cmseInputLib; llvm::StringRef cmseOutputLib; ReportPolicy zBtiReport = ReportPolicy::None; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 3e7e05746483a..4555a85a4d216 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1545,6 +1545,15 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { ctx.arg.warnSymbolOrdering = args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract); + for (opt::Arg *arg : args.filtered(OPT_why_live)) { + StringRef value(arg->getValue()); + if (Expected pat = GlobPattern::create(arg->getValue())) { + ctx.arg.whyLive.emplace_back(std::move(*pat)); + } else { + ErrAlways(ctx) << arg->getSpelling() << ": " << pat.takeError(); + continue; + } + } ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); ctx.arg.zForceBti = hasZOption(args, "force-bti"); diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index b6c22884d9176..afebc4464c909 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -29,9 +29,11 @@ #include "Target.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Strings.h" +#include "llvm/ADT/DenseMapInfoVariant.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Object/ELF.h" #include "llvm/Support/TimeProfiler.h" +#include #include using namespace llvm; @@ -42,16 +44,29 @@ using namespace lld; using namespace lld::elf; namespace { -template class MarkLive { +using SecOffset = std::pair; + +// Something that can have an independent reason for being live. +using LiveItem = std::variant; + +// The most proximate reason that something is live. +struct LiveReason { + std::optional item; + StringRef desc; +}; + +template class MarkLive { public: MarkLive(Ctx &ctx, unsigned partition) : ctx(ctx), partition(partition) {} void run(); void moveToMain(); + void printWhyLive(Symbol *s) const; private: - void enqueue(InputSectionBase *sec, uint64_t offset); - void markSymbol(Symbol *sym); + void enqueue(InputSectionBase *sec, uint64_t offset, Symbol *sym, + LiveReason reason); + void markSymbol(Symbol *sym, StringRef reason); void mark(); template @@ -70,6 +85,12 @@ template class MarkLive { // There are normally few input sections whose names are valid C // identifiers, so we just store a SmallVector instead of a multimap. DenseMap> cNamedSections; + + // The most proximate reason that something is live. This forms a DAG between + // LiveItems. Acyclicality is maintained by only admitting the first + // discovered reason for each LiveItem; this captures the acyclic region of + // the liveness graph around the GC roots. + DenseMap whyLive; }; } // namespace @@ -93,14 +114,18 @@ static uint64_t getAddend(Ctx &, InputSectionBase &sec, return rel.r_addend; } -template +template template -void MarkLive::resolveReloc(InputSectionBase &sec, RelTy &rel, - bool fromFDE) { +void MarkLive::resolveReloc(InputSectionBase &sec, + RelTy &rel, bool fromFDE) { // If a symbol is referenced in a live section, it is used. Symbol &sym = sec.file->getRelocTargetSym(rel); sym.used = true; + LiveReason reason; + if (TrackWhyLive) + reason = {SecOffset(&sec, rel.r_offset), "referenced by"}; + if (auto *d = dyn_cast(&sym)) { auto *relSec = dyn_cast_or_null(d->section); if (!relSec) @@ -119,17 +144,33 @@ void MarkLive::resolveReloc(InputSectionBase &sec, RelTy &rel, // group/SHF_LINK_ORDER rules (b) if the associated text section should be // discarded, marking the LSDA will unnecessarily retain the text section. if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) || - relSec->nextInSectionGroup))) - enqueue(relSec, offset); + relSec->nextInSectionGroup))) { + Symbol *canonicalSym = d; + if (TrackWhyLive && d->isSection()) { + // This is expensive, so ideally this would be deferred until it's known + // whether this reference contributes to a printed whyLive chain, but + // that determination cannot be made without knowing the enclosing + // symbol. + if (Symbol *s = relSec->getEnclosingSymbol(offset)) + canonicalSym = s; + else + canonicalSym = nullptr; + } + enqueue(relSec, offset, canonicalSym, reason); + } return; } - if (auto *ss = dyn_cast(&sym)) - if (!ss->isWeak()) + if (auto *ss = dyn_cast(&sym)) { + if (!ss->isWeak()) { cast(ss->file)->isNeeded = true; + if (TrackWhyLive) + whyLive.try_emplace(&sym, reason); + } + } for (InputSectionBase *sec : cNamedSections.lookup(sym.getName())) - enqueue(sec, 0); + enqueue(sec, /*offset=*/0, /*sym=*/nullptr, reason); } // The .eh_frame section is an unfortunate special case. @@ -146,10 +187,10 @@ void MarkLive::resolveReloc(InputSectionBase &sec, RelTy &rel, // A possible improvement would be to fully process .eh_frame in the middle of // the gc pass. With that we would be able to also gc some sections holding // LSDAs and personality functions if we found that they were unused. -template +template template -void MarkLive::scanEhFrameSection(EhInputSection &eh, - ArrayRef rels) { +void MarkLive::scanEhFrameSection(EhInputSection &eh, + ArrayRef rels) { for (const EhSectionPiece &cie : eh.cies) if (cie.firstRelocation != unsigned(-1)) resolveReloc(eh, rels[cie.firstRelocation], false); @@ -186,8 +227,10 @@ static bool isReserved(InputSectionBase *sec) { } } -template -void MarkLive::enqueue(InputSectionBase *sec, uint64_t offset) { +template +void MarkLive::enqueue(InputSectionBase *sec, + uint64_t offset, Symbol *sym, + LiveReason reason) { // Usually, a whole section is marked as live or dead, but in mergeable // (splittable) sections, each piece of data has independent liveness bit. // So we explicitly tell it which offset is in use. @@ -201,28 +244,101 @@ void MarkLive::enqueue(InputSectionBase *sec, uint64_t offset) { return; sec->partition = sec->partition ? 1 : partition; + if (TrackWhyLive) { + if (sym) { + // If a specific symbol is referenced, that keeps it live. The symbol then + // keeps its section live. + whyLive.try_emplace(sym, reason); + whyLive.try_emplace(sec, LiveReason{sym, "contained live symbol"}); + } else { + // Otherwise, the reference generically keeps the section live. + whyLive.try_emplace(sec, reason); + } + } + // Add input section to the queue. if (InputSection *s = dyn_cast(sec)) queue.push_back(s); } -template void MarkLive::markSymbol(Symbol *sym) { +// Print the stack of reasons that the given symbol is live. +template +void MarkLive::printWhyLive(Symbol *s) const { + // Skip dead symbols. A symbol is dead if it belongs to a dead section. + if (auto *d = dyn_cast(s)) { + auto *sec = dyn_cast_or_null(d->section); + if (sec && !sec->isLive()) + return; + } + + auto msg = Msg(ctx); + + const auto printSymbol = [&](Symbol *s) { + msg << s->file << ":(" << s << ')'; + }; + + msg << "live symbol: "; + printSymbol(s); + + LiveItem cur = s; + while (true) { + auto it = whyLive.find(cur); + LiveReason reason; + // If there is a specific reason this item is live... + if (it != whyLive.end()) { + reason = it->second; + } else { + // This item is live, but it has no tracked reason. It must be an + // unreferenced symbol in a live section or a symbol with no section. + InputSectionBase *sec = nullptr; + if (auto *d = dyn_cast(std::get(cur))) + sec = dyn_cast_or_null(d->section); + reason = sec ? LiveReason{sec, "in live section"} + : LiveReason{std::nullopt, "no section"}; + } + + if (!reason.item) { + msg << " (" << reason.desc << ')'; + break; + } + + msg << "\n>>> " << reason.desc << ": "; + // The reason may not yet have been resolved to a symbol; do so now. + if (std::holds_alternative(*reason.item)) { + const auto &so = std::get(*reason.item); + InputSectionBase *sec = so.first; + Defined *sym = sec->getEnclosingSymbol(so.second); + cur = sym ? LiveItem(sym) : LiveItem(sec); + } else { + cur = *reason.item; + } + + if (std::holds_alternative(cur)) + printSymbol(std::get(cur)); + else + msg << std::get(cur); + } +} + +template +void MarkLive::markSymbol(Symbol *sym, StringRef reason) { if (auto *d = dyn_cast_or_null(sym)) if (auto *isec = dyn_cast_or_null(d->section)) - enqueue(isec, d->value); + enqueue(isec, d->value, sym, {std::nullopt, reason}); } // This is the main function of the garbage collector. // Starting from GC-root sections, this function visits all reachable // sections to set their "Live" bits. -template void MarkLive::run() { +template +void MarkLive::run() { // Add GC root symbols. // Preserve externally-visible symbols if the symbols defined by this // file can interpose other ELF file's symbols at runtime. for (Symbol *sym : ctx.symtab->getSymbols()) if (sym->isExported && sym->partition == partition) - markSymbol(sym); + markSymbol(sym, "externally visible symbol; may interpose"); // If this isn't the main partition, that's all that we need to preserve. if (partition != 1) { @@ -230,16 +346,16 @@ template void MarkLive::run() { return; } - markSymbol(ctx.symtab->find(ctx.arg.entry)); - markSymbol(ctx.symtab->find(ctx.arg.init)); - markSymbol(ctx.symtab->find(ctx.arg.fini)); + markSymbol(ctx.symtab->find(ctx.arg.entry), "entry point"); + markSymbol(ctx.symtab->find(ctx.arg.init), "initializer function"); + markSymbol(ctx.symtab->find(ctx.arg.fini), "finalizer function"); for (StringRef s : ctx.arg.undefined) - markSymbol(ctx.symtab->find(s)); + markSymbol(ctx.symtab->find(s), "undefined command line flag"); for (StringRef s : ctx.script->referencedSymbols) - markSymbol(ctx.symtab->find(s)); + markSymbol(ctx.symtab->find(s), "referenced by linker script"); for (auto [symName, _] : ctx.symtab->cmseSymMap) { - markSymbol(ctx.symtab->cmseSymMap[symName].sym); - markSymbol(ctx.symtab->cmseSymMap[symName].acleSeSym); + markSymbol(ctx.symtab->cmseSymMap[symName].sym, "ARM CMSE symbol"); + markSymbol(ctx.symtab->cmseSymMap[symName].acleSeSym, "ARM CMSE symbol"); } // Mark .eh_frame sections as live because there are usually no relocations @@ -256,7 +372,7 @@ template void MarkLive::run() { } for (InputSectionBase *sec : ctx.inputSections) { if (sec->flags & SHF_GNU_RETAIN) { - enqueue(sec, 0); + enqueue(sec, /*offset=*/0, /*sym=*/nullptr, {std::nullopt, "retained"}); continue; } if (sec->flags & SHF_LINK_ORDER) @@ -294,8 +410,11 @@ template void MarkLive::run() { // Preserve special sections and those which are specified in linker // script KEEP command. - if (isReserved(sec) || ctx.script->shouldKeep(sec)) { - enqueue(sec, 0); + if (isReserved(sec)) { + enqueue(sec, /*offset=*/0, /*sym=*/nullptr, {std::nullopt, "reserved"}); + } else if (ctx.script->shouldKeep(sec)) { + enqueue(sec, /*offset=*/0, /*sym=*/nullptr, + {std::nullopt, "KEEP in linker script"}); } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) && isValidCIdentifier(sec->name)) { // As a workaround for glibc libc.a before 2.34 @@ -307,9 +426,26 @@ template void MarkLive::run() { } mark(); + + if (TrackWhyLive) { + const auto handleSym = [&](Symbol *sym) { + if (llvm::any_of(ctx.arg.whyLive, [sym](const llvm::GlobPattern &pat) { + return pat.match(sym->getName()); + })) + printWhyLive(sym); + }; + + for (Symbol *sym : ctx.symtab->getSymbols()) + handleSym(sym); + for (ELFFileBase *file : ctx.objectFiles) + for (Symbol *sym : file->getSymbols()) + if (sym->isLocal()) + handleSym(sym); + } } -template void MarkLive::mark() { +template +void MarkLive::mark() { // Mark all reachable sections. while (!queue.empty()) { InputSectionBase &sec = *queue.pop_back_val(); @@ -323,11 +459,13 @@ template void MarkLive::mark() { resolveReloc(sec, rel, false); for (InputSectionBase *isec : sec.dependentSections) - enqueue(isec, 0); + enqueue(isec, /*offset=*/0, /*sym=*/nullptr, + {&sec, "depended on by section"}); // Mark the next group member. if (sec.nextInSectionGroup) - enqueue(sec.nextInSectionGroup, 0); + enqueue(sec.nextInSectionGroup, /*offset=*/0, /*sym=*/nullptr, + {&sec, "in section group with"}); } } @@ -340,20 +478,21 @@ template void MarkLive::mark() { // We also need to move sections whose names are C identifiers that are referred // to from __start_/__stop_ symbols because there will only be one set of // symbols for the whole program. -template void MarkLive::moveToMain() { +template +void MarkLive::moveToMain() { for (ELFFileBase *file : ctx.objectFiles) for (Symbol *s : file->getSymbols()) if (auto *d = dyn_cast(s)) if ((d->type == STT_GNU_IFUNC || d->type == STT_TLS) && d->section && d->section->isLive()) - markSymbol(s); + markSymbol(s, /*reason=*/{}); for (InputSectionBase *sec : ctx.inputSections) { if (!sec->isLive() || !isValidCIdentifier(sec->name)) continue; if (ctx.symtab->find(("__start_" + sec->name).str()) || ctx.symtab->find(("__stop_" + sec->name).str())) - enqueue(sec, 0); + enqueue(sec, /*offset=*/0, /*sym=*/nullptr, /*reason=*/{}); } mark(); @@ -379,13 +518,16 @@ template void elf::markLive(Ctx &ctx) { // Follow the graph to mark all live sections. for (unsigned i = 1, e = ctx.partitions.size(); i <= e; ++i) - MarkLive(ctx, i).run(); + if (ctx.arg.whyLive.empty()) + MarkLive(ctx, i).run(); + else + MarkLive(ctx, i).run(); // If we have multiple partitions, some sections need to live in the main // partition even if they were allocated to a loadable partition. Move them // there now. if (ctx.partitions.size() != 1) - MarkLive(ctx, 1).moveToMain(); + MarkLive(ctx, 1).moveToMain(); // Report garbage-collected sections. if (ctx.arg.printGcSections) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index b3b12a0646875..62d8f49acde39 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -568,6 +568,12 @@ defm whole_archive: B<"whole-archive", def why_extract: JJ<"why-extract=">, HelpText<"Print to a file about why archive members are extracted">; +defm why_live + : EEq<"why-live", + "Report a chain of references preventing garbage collection for " + "each symbol matching ">, + MetaVarName<"">; + defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and " "__real_symbol references to symbol">, MetaVarName<"">; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 431b694f7bce1..43ed6fa73589b 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -35,6 +35,10 @@ ELF Improvements not provided in the command line by the user and cannot be inferred from inputs. +* ``--why-live=`` prints for each symbol matching ```` a chain of + items that kept it live during garbage collection. This is inspired by the + Mach-O LLD feature of the same name. + Breaking changes ---------------- diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index b5c1816ce6e5f..7b2650637cb10 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -755,6 +755,8 @@ Report unresolved symbols as warnings. Force load of all members in a static library. .It Fl -why-extract Ns = Ns Ar file Print to a file about why archive members are extracted. +.It Fl -why-live Ns = Ns Ar glob +Report a chain of references preventing garbage collection for each symbol matching the glob. .It Fl -wrap Ns = Ns Ar symbol Redirect .Ar symbol diff --git a/lld/test/ELF/why-live.test b/lld/test/ELF/why-live.test new file mode 100644 index 0000000000000..d8f8cc7b6db64 --- /dev/null +++ b/lld/test/ELF/why-live.test @@ -0,0 +1,161 @@ +# REQUIRES: x86 + +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -n -filetype=obj -triple=x86_64 -o shared.o shared.s +# RUN: ld.lld -shared shared.o -o a.so +# RUN: llvm-mc -n -filetype=obj -triple=x86_64 -o a.o a.s + +#--- shared.s +.globl test_shared +.section .test_shared,"ax",@progbits +test_shared: + jmp test_shared + +#--- a.s +## Simple live section +.globl _start +.section ._start,"ax",@progbits +_start: + jmp test_simple + .quad .Lanonymous + .quad .Lanonymous_within_symbol + jmp test_shared + .quad test_local +.size _start, .-_start + +.globl test_simple +.section .test_simple,"ax",@progbits +test_simple: + jmp test_simple + jmp test_from_unsized + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_simple | FileCheck %s --check-prefix=SIMPLE + +# SIMPLE: live symbol: a.o:(test_simple) +# SIMPLE-NEXT: >>> referenced by: a.o:(_start) (entry point) +# SIMPLE-EMPTY: + +## Live only by being a member of .test_simple +.globl test_incidental +test_incidental: + jmp test_incidental + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_incidental | FileCheck %s --check-prefix=INCIDENTAL + +# INCIDENTAL: live symbol: a.o:(test_incidental) +# INCIDENTAL-NEXT: >>> in live section: a.o:(.test_simple) +# INCIDENTAL-NEXT: >>> contained live symbol: a.o:(test_simple) +# INCIDENTAL-NEXT: >>> referenced by: a.o:(_start) (entry point) +# INCIDENTAL-EMPTY: + +## Reached from a reference in section .test_simple directly, since test_simple is an unsized symbol. +.globl test_from_unsized +.section .test_from_unsized,"ax",@progbits +test_from_unsized: + jmp test_from_unsized + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_from_unsized | FileCheck %s --check-prefix=FROM-UNSIZED + +# FROM-UNSIZED: live symbol: a.o:(test_from_unsized) +# FROM-UNSIZED-NEXT: >>> referenced by: a.o:(.test_simple) +# FROM-UNSIZED-NEXT: >>> contained live symbol: a.o:(test_simple) +# FROM-UNSIZED-NEXT: >>> referenced by: a.o:(_start) (entry point) +# FROM-UNSIZED-EMPTY: + +## Symbols in dead sections are dead and not reported. +.globl test_dead +.section .test_dead,"ax",@progbits +test_dead: + jmp test_dead + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_dead | count 0 + +## Undefined symbols are considered live, since they are not in dead sections. + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_undef -u test_undef | FileCheck %s --check-prefix=UNDEFINED + +# UNDEFINED: live symbol: :(test_undef) (no section) +# UNDEFINED-EMPTY: + +## Defined symbols without input section parents are live. +.globl test_absolute +test_absolute = 1234 + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_absolute | FileCheck %s --check-prefix=ABSOLUTE + +# ABSOLUTE: live symbol: a.o:(test_absolute) (no section) +# ABSOLUTE-EMPTY: + +## Retained sections are intrinsically live, and they make contained symbols live. +.globl test_retained +.section .test_retained,"axR",@progbits +test_retained: + jmp test_retained + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_retained | FileCheck %s --check-prefix=RETAINED + +# RETAINED: live symbol: a.o:(test_retained) +# RETAINED-NEXT: >>> in live section: a.o:(.test_retained) (retained) +# RETAINED-EMPTY: + +## Relocs that reference offsets from sections (e.g., from anonymous symbols) are considered to point to the section if no enclosing symbol exists. + +.globl test_section_offset +.section .test_section_offset,"ax",@progbits +test_section_offset: + jmp test_section_offset +.Lanonymous: + jmp test_section_offset + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_section_offset | FileCheck %s --check-prefix=SECTION-OFFSET + +# SECTION-OFFSET: live symbol: a.o:(test_section_offset) +# SECTION-OFFSET-NEXT: >>> in live section: a.o:(.test_section_offset) +# SECTION-OFFSET-NEXT: >>> referenced by: a.o:(_start) (entry point) +# SECTION-OFFSET-EMPTY: + +## Relocs that reference offsets from sections (e.g., from anonymous symbols) are considered to point to the enclosing symbol if one exists. + +.globl test_section_offset_within_symbol +.section .test_section_offset_within_symbol,"ax",@progbits +test_section_offset_within_symbol: + jmp test_section_offset_within_symbol +.Lanonymous_within_symbol: + jmp test_section_offset_within_symbol +.size test_section_offset_within_symbol, .-test_section_offset_within_symbol + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_section_offset_within_symbol | FileCheck %s --check-prefix=SECTION-OFFSET-WITHIN-SYMBOL + +# SECTION-OFFSET-WITHIN-SYMBOL: live symbol: a.o:(test_section_offset_within_symbol) +# SECTION-OFFSET-WITHIN-SYMBOL-NEXT: >>> referenced by: a.o:(_start) (entry point) +# SECTION-OFFSET-WITHIN-SYMBOL-EMPTY: + +## Local symbols can be queried just like global symbols. + +.section .test_local,"ax",@progbits +test_local: + jmp test_local +.size test_local, .-test_local + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_local | FileCheck %s --check-prefix=LOCAL + +# LOCAL: live symbol: a.o:(test_local) +# LOCAL-NEXT: >>> referenced by: a.o:(_start) (entry point) +# LOCAL-EMPTY: + +## Shared symbols + +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_shared | FileCheck %s --check-prefix=SHARED + +# SHARED: live symbol: a.so:(test_shared) +# SHARED-NEXT: >>> referenced by: a.o:(_start) (entry point) +# SHARED-EMPTY: + +## Globs match multiple cases. Multiple --why-live flags union. + +# RUN: ld.lld a.o a.so --gc-sections --why-live="test_se*" --why-live="test_se*" | FileCheck %s --check-prefix=MULTIPLE +# RUN: ld.lld a.o a.so --gc-sections --why-live=test_section_offset --why-live=test_section_offset_within_symbol | FileCheck %s --check-prefix=MULTIPLE + +# MULTIPLE-DAG: live symbol: a.o:(test_section_offset) +# MULTIPLE-DAG: live symbol: a.o:(test_section_offset_within_symbol) +# MULTIPLE-NOT: live symbol