-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[lld][ELF] Merge equivalent symbols found during ICF #139493
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
faa11ed
e4041b7
adef51a
6b435f9
6c41c38
3aaeac5
0067993
82a9d4b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -77,9 +77,12 @@ | |
| #include "InputFiles.h" | ||
| #include "LinkerScript.h" | ||
| #include "OutputSections.h" | ||
| #include "Relocations.h" | ||
| #include "SymbolTable.h" | ||
| #include "Symbols.h" | ||
| #include "SyntheticSections.h" | ||
| #include "Target.h" | ||
| #include "llvm/ADT/EquivalenceClasses.h" | ||
| #include "llvm/BinaryFormat/ELF.h" | ||
| #include "llvm/Object/ELF.h" | ||
| #include "llvm/Support/Parallel.h" | ||
|
|
@@ -104,18 +107,23 @@ template <class ELFT> class ICF { | |
| void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant); | ||
|
|
||
| template <class RelTy> | ||
| bool constantEq(const InputSection *a, Relocs<RelTy> relsA, | ||
| const InputSection *b, Relocs<RelTy> relsB); | ||
| bool constantEq(InputSection *a, Relocs<RelTy> relsA, InputSection *b, | ||
| Relocs<RelTy> relsB); | ||
|
|
||
| template <class RelTy> | ||
| bool variableEq(const InputSection *a, Relocs<RelTy> relsA, | ||
| const InputSection *b, Relocs<RelTy> relsB); | ||
| bool variableEq(InputSection *a, Relocs<RelTy> relsA, InputSection *b, | ||
| Relocs<RelTy> relsB); | ||
|
|
||
| bool equalsConstant(const InputSection *a, const InputSection *b); | ||
| bool equalsVariable(const InputSection *a, const InputSection *b); | ||
| bool equalsConstant(InputSection *a, InputSection *b); | ||
| bool equalsVariable(InputSection *a, InputSection *b); | ||
|
|
||
| size_t findBoundary(size_t begin, size_t end); | ||
|
|
||
| // A relocation with side-effects is considered non-trivial. Eg: relocation | ||
| // creates GOT entry or TLS slot. | ||
| template <class RelTy> | ||
| bool isTrivialRelocation(InputSection *a, Symbol &s, RelTy reloc); | ||
|
|
||
| void forEachClassRange(size_t begin, size_t end, | ||
| llvm::function_ref<void(size_t, size_t)> fn); | ||
|
|
||
|
|
@@ -234,11 +242,30 @@ void ICF<ELFT>::segregate(size_t begin, size_t end, uint32_t eqClassBase, | |
| } | ||
| } | ||
|
|
||
| template <class ELFT> | ||
| template <class RelTy> | ||
| bool ICF<ELFT>::isTrivialRelocation(InputSection *a, Symbol &s, RelTy reloc) { | ||
| OffsetGetter getter(*a); | ||
| uint64_t offset = getter.get(ctx, reloc.r_offset); | ||
| RelExpr expr = ctx.target->getRelExpr(reloc.getType(ctx.arg.isMips64EL), s, | ||
| a->content().data() + offset); | ||
|
|
||
| if (needsGot(expr) || needsTls(s, expr)) | ||
| return false; | ||
| return true; | ||
| } | ||
|
|
||
| // Two symbols referenced by relocations can be merged together safely | ||
| // when their addends are same. | ||
| static bool canMergeSymbols(uint64_t addA, uint64_t addB) { | ||
| return addA == addB; | ||
| } | ||
|
|
||
| // Compare two lists of relocations. | ||
| template <class ELFT> | ||
| template <class RelTy> | ||
| bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra, | ||
| const InputSection *secB, Relocs<RelTy> rb) { | ||
| bool ICF<ELFT>::constantEq(InputSection *secA, Relocs<RelTy> ra, | ||
| InputSection *secB, Relocs<RelTy> rb) { | ||
| if (ra.size() != rb.size()) | ||
| return false; | ||
| auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); | ||
|
|
@@ -286,9 +313,14 @@ bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra, | |
| // Relocations referring to InputSections are constant-equal if their | ||
| // section offsets are equal. | ||
| if (isa<InputSection>(da->section)) { | ||
| if (da->value + addA == db->value + addB) | ||
| if (da->value + addA == db->value + addB) { | ||
| // For non-trivial relocations, if we cannot merge symbols together, | ||
| // we must not merge sections either. | ||
| if (!isTrivialRelocation(secA, sa, *rai) && | ||
| !canMergeSymbols(addA, addB)) | ||
| return false; | ||
| continue; | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| // Relocations referring to MergeInputSections are constant-equal if their | ||
|
|
@@ -314,7 +346,7 @@ bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra, | |
| // Compare "non-moving" part of two InputSections, namely everything | ||
| // except relocation targets. | ||
| template <class ELFT> | ||
| bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) { | ||
| bool ICF<ELFT>::equalsConstant(InputSection *a, InputSection *b) { | ||
| if (a->flags != b->flags || a->getSize() != b->getSize() || | ||
| a->content() != b->content()) | ||
| return false; | ||
|
|
@@ -333,12 +365,35 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) { | |
| : constantEq(a, ra.relas, b, rb.relas); | ||
| } | ||
|
|
||
| template <class ELFT, class RelTy> | ||
| static SmallVector<std::pair<Symbol *, uint64_t>> | ||
| getReloc(const InputSection *sec, Relocs<RelTy> relocs) { | ||
| SmallVector<std::pair<Symbol *, uint64_t>> syms; | ||
| for (auto ri = relocs.begin(), re = relocs.end(); ri != re; ++ri) { | ||
| Symbol &sym = sec->file->getRelocTargetSym(*ri); | ||
| syms.emplace_back(&sym, getAddend<ELFT>(*ri)); | ||
| } | ||
| return syms; | ||
| } | ||
|
|
||
| template <class ELFT> | ||
| static SmallVector<std::pair<Symbol *, uint64_t>> | ||
| getRelocTargetSyms(const InputSection *sec) { | ||
| const RelsOrRelas<ELFT> rel = sec->template relsOrRelas<ELFT>(); | ||
| if (rel.areRelocsCrel()) | ||
| return getReloc<ELFT>(sec, rel.crels); | ||
| if (rel.areRelocsRel()) | ||
| return getReloc<ELFT>(sec, rel.rels); | ||
|
|
||
| return getReloc<ELFT>(sec, rel.relas); | ||
| } | ||
|
|
||
| // Compare two lists of relocations. Returns true if all pairs of | ||
| // relocations point to the same section in terms of ICF. | ||
| template <class ELFT> | ||
| template <class RelTy> | ||
| bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra, | ||
| const InputSection *secB, Relocs<RelTy> rb) { | ||
| bool ICF<ELFT>::variableEq(InputSection *secA, Relocs<RelTy> ra, | ||
| InputSection *secB, Relocs<RelTy> rb) { | ||
| assert(ra.size() == rb.size()); | ||
|
|
||
| auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); | ||
|
|
@@ -352,6 +407,15 @@ bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra, | |
| auto *da = cast<Defined>(&sa); | ||
| auto *db = cast<Defined>(&sb); | ||
|
|
||
| // Prevent sections containing local symbols from merging into sections with | ||
| // global symbols, or vice-versa. This is to prevent local-global symbols | ||
| // getting merged into each other (done later in ICF). We do this as | ||
| // post-ICF passes cannot handle duplicates when iterating over local | ||
| // symbols. There are also assertions that prevent this. | ||
| if ((!da->isGlobal() || !db->isGlobal()) && | ||
|
||
| !isTrivialRelocation(secA, sa, *rai)) | ||
| return false; | ||
|
|
||
| // We already dealt with absolute and non-InputSection symbols in | ||
| // constantEq, and for InputSections we have already checked everything | ||
| // except the equivalence class. | ||
|
|
@@ -375,7 +439,7 @@ bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra, | |
|
|
||
| // Compare "moving" part of two InputSections, namely relocation targets. | ||
| template <class ELFT> | ||
| bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) { | ||
| bool ICF<ELFT>::equalsVariable(InputSection *a, InputSection *b) { | ||
| const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>(); | ||
| const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>(); | ||
| if (ra.areRelocsCrel() || rb.areRelocsCrel()) | ||
|
|
@@ -537,14 +601,28 @@ template <class ELFT> void ICF<ELFT>::run() { | |
| auto print = [&ctx = ctx]() -> ELFSyncStream { | ||
| return {ctx, ctx.arg.printIcfSections ? DiagLevel::Msg : DiagLevel::None}; | ||
| }; | ||
|
|
||
| EquivalenceClasses<Symbol *> symbolEquivalence; | ||
| // Merge sections by the equivalence class. | ||
| // Merge symbols identified as equivalent during ICF. | ||
| forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) { | ||
| if (end - begin == 1) | ||
| return; | ||
| print() << "selected section " << sections[begin]; | ||
| SmallVector<std::pair<Symbol *, uint64_t>> syms = | ||
| getRelocTargetSyms<ELFT>(sections[begin]); | ||
| for (size_t i = begin + 1; i < end; ++i) { | ||
| print() << " removing identical section " << sections[i]; | ||
| sections[begin]->replace(sections[i]); | ||
| SmallVector<std::pair<Symbol *, uint64_t>> replacedSyms = | ||
| getRelocTargetSyms<ELFT>(sections[i]); | ||
| assert(syms.size() == replacedSyms.size() && | ||
| "Should have same number of syms!"); | ||
| for (size_t i = 0; i < syms.size(); i++) { | ||
| if (!canMergeSymbols(syms[i].second, replacedSyms[i].second)) | ||
| continue; | ||
| symbolEquivalence.unionSets(syms[i].first, replacedSyms[i].first); | ||
| } | ||
|
|
||
| // At this point we know sections merged are fully identical and hence | ||
| // we want to remove duplicate implicit dependencies such as link order | ||
|
|
@@ -563,11 +641,26 @@ template <class ELFT> void ICF<ELFT>::run() { | |
| d->folded = true; | ||
| } | ||
| }; | ||
| for (Symbol *sym : ctx.symtab->getSymbols()) | ||
| for (Symbol *sym : ctx.symtab->getSymbols()) { | ||
| fold(sym); | ||
| auto it = symbolEquivalence.findLeader(sym); | ||
| if (it != symbolEquivalence.member_end() && *it != sym) { | ||
| print() << "redirecting '" << sym->getName() << "' in symtab to '" | ||
| << (*it)->getName() << "'"; | ||
| ctx.symtab->redirect(sym, *it); | ||
| } | ||
| } | ||
| parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) { | ||
| for (Symbol *sym : file->getLocalSymbols()) | ||
| fold(sym); | ||
| for (Symbol *&sym : file->getMutableGlobalSymbols()) { | ||
| auto it = symbolEquivalence.findLeader(sym); | ||
| if (it != symbolEquivalence.member_end() && *it != sym) { | ||
| print() << "redirecting '" << sym->getName() << "' to '" | ||
| << (*it)->getName() << "'"; | ||
| sym = *it; | ||
| } | ||
| } | ||
| }); | ||
|
|
||
| // InputSectionDescription::sections is populated by processSectionCommands(). | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change was not in #134342 . I have tried to understand why the logic is so complex.
Looks like https://reviews.llvm.org/D34094 , while supporting MergeInputSection, also introduced the untested condition for InputSection
I feel that we should switch to the safer
da->value == db->value && addA == addB, which likely doesn't lose anything (I believe it's difficult for clang emitted code to utilize the relaxed condition.)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
da->value + addA == db->value + addBis more readable. I can read it as if the address resolves to the same place, sections are considered same. It also takes care of section folding for more cases. Changing it toda->value == db->value && addA == addBwould exclude some cases.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The excluded cases are likely negligible. This condition is for InputSection (non merge string), where symbols defined not at offset 0 in -ffunction-sections -fdata-sections code are extremely rare. I don't know a case LLVM codegen would do this.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The chromium build that made us revert the original change did have many sections that were being merged due to this property and @zmodem tried my second commit in this PR (e4041b7) separately and noticed a binary size regression that was unacceptable to them. This comment on that Chromium bug is relevant: ~~
https://b.corp.google.com/issues/415810137#comment44(https://issues.chromium.org/issues/415810137#comment44)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you insist, I propose to do that in a separate PR so that these changes are not affected by any potential revert.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it helps break disagreement, I lean towards @MaskRay 's suggestion to use
da->value == db->value && addA == addBas the condition (same value, same addend). Maybe it misses something, but it's conceptually simpler even if it misses some case.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I corrected the link in my previous comment to a public one. Apologies I didn't notice that I pasted an internal link to the Chromium bug.
Using
da->value == db->value && addA == addBmay seem trivial but it misses cases that increases the size of chromium builds to unacceptable levels. As I mentioned in my previous comment, @zmodem had tried it already and noticed a large size regression.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may be misremembering, but IIRC the only size impact we complained about was from the suggested workaround of using
--icf=safe.I don't remember evaluating the size impact of this PR. We just tried it and confirmed it works (https://crbug.com/415810137#comment50), waited for it to get through review, and reverted the original change as the fix seemed to be taking a while.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm. Thanks for clarification. I was under the impression that after my comment here (https://issues.chromium.org/issues/415810137#comment50), you tried it and confirmed it works but the size regression was not acceptable to you.
I am okay simplifying the condition here after this clarification.