From 9a3d581261f82ce8ad53d5a001f8cdbbea6e2a89 Mon Sep 17 00:00:00 2001 From: Pranav Kant Date: Tue, 22 Apr 2025 01:41:36 +0000 Subject: [PATCH 1/3] [lld][ICF] Prevent merging two sections when they point to non-globals --- lld/ELF/ICF.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 849f6bdd445f9..cc2e768cd18cb 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -375,6 +375,18 @@ bool ICF::variableEq(const InputSection *secA, Relocs ra, auto *da = cast(&sa); auto *db = cast(&sb); + // Merging sections here also means that we would mark corresponding + // relocation target symbols as equivalent, done later in ICF during section + // folding. To preserve correctness for such symbol equivalence (see + // GH#129122 for details), we also have to disable section merging here: + // 1. We don't merge local symbols into global symbols, or vice-versa. There + // are post-icf passes that assert on this behavior. + // 2. We also don't merge two local symbols together. There are post-icf + // passes that expect to see no duplicates when iterating over local + // symbols. + if (!da->isGlobal() || !db->isGlobal()) + return false; + // We already dealt with absolute and non-InputSection symbols in // constantEq, and for InputSections we have already checked everything // except the equivalence class. From 4e8735792a4c964f6edfc7d2d1aa4b0b8bed2e6c Mon Sep 17 00:00:00 2001 From: Pranav Kant Date: Tue, 22 Apr 2025 02:19:19 +0000 Subject: [PATCH 2/3] Tighter condition to prevent ICF --- lld/ELF/ICF.cpp | 21 ++++++++++++++++++++- lld/test/ELF/icf-ineligible.s | 5 ++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index cc2e768cd18cb..68f8d88920718 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -77,6 +77,7 @@ #include "InputFiles.h" #include "LinkerScript.h" #include "OutputSections.h" +#include "Relocations.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" @@ -356,6 +357,22 @@ static SmallVector getRelocTargetSyms(const InputSection *sec) { return getReloc(sec, rel.relas); } +// Checks if relocation has semantics beyond just the offset. We identify +// such relocations to prevent ICF to preserve correctness. +static bool isTrivialRelocationType(uint16_t emachine, RelType type) { + if (emachine == EM_AARCH64) { + switch (type) { + case R_AARCH64_GOT_LD_PREL19: + case R_AARCH64_LD64_GOTOFF_LO15: + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_LD64_GOTPAGE_LO15: + return false; + } + } + return true; +} + // Compare two lists of relocations. Returns true if all pairs of // relocations point to the same section in terms of ICF. template @@ -384,7 +401,9 @@ bool ICF::variableEq(const InputSection *secA, Relocs ra, // 2. We also don't merge two local symbols together. There are post-icf // passes that expect to see no duplicates when iterating over local // symbols. - if (!da->isGlobal() || !db->isGlobal()) + if ((da->isGlobal() != db->isGlobal()) && + !isTrivialRelocationType(ctx.arg.emachine, + rai->getType(ctx.arg.isMips64EL))) return false; // We already dealt with absolute and non-InputSection symbols in diff --git a/lld/test/ELF/icf-ineligible.s b/lld/test/ELF/icf-ineligible.s index bc63f9f35beb5..168a3f07b4285 100644 --- a/lld/test/ELF/icf-ineligible.s +++ b/lld/test/ELF/icf-ineligible.s @@ -8,9 +8,8 @@ # CHECK: selected section {{.*}}:(.text.f1) # CHECK: removing identical section {{.*}}:(.text.f2) -# CHECK: removing identical section {{.*}}:(.text.f3) -# CHECK: selected section {{.*}}:(.text.f4) -# CHECK: removing identical section {{.*}}:(.text.f5) +# CHECK: redirecting 'd_alias' in symtab to 'd' +# CHECK: redirecting 'd_alias' to 'd' .globl d, d_alias, fu, f1, f2, f3, f4, f5 From 7efab7dca72d34f62212e6e158cabdac535ab2be Mon Sep 17 00:00:00 2001 From: Pranav Kant Date: Thu, 24 Apr 2025 00:09:03 +0000 Subject: [PATCH 3/3] Peter review --- lld/ELF/ICF.cpp | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 68f8d88920718..361058b59127a 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -357,8 +357,9 @@ static SmallVector getRelocTargetSyms(const InputSection *sec) { return getReloc(sec, rel.relas); } -// Checks if relocation has semantics beyond just the offset. We identify -// such relocations to prevent ICF to preserve correctness. +// A non-trivial relocation should ideally not be split by Machine outliner +// but it's not illegal to split it in which case ICF shouldn't fold outlined +// functions containing these relocations. static bool isTrivialRelocationType(uint16_t emachine, RelType type) { if (emachine == EM_AARCH64) { switch (type) { @@ -367,6 +368,35 @@ static bool isTrivialRelocationType(uint16_t emachine, RelType type) { case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_LD64_GOT_LO12_NC: case R_AARCH64_LD64_GOTPAGE_LO15: + case R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: + case R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + case R_AARCH64_TLSIE_LD_GOTTPREL_PREL19: + case R_AARCH64_TLSDESC_LD_PREL19: + case R_AARCH64_TLSDESC_ADR_PREL21: + case R_AARCH64_TLSDESC_ADR_PAGE21: + case R_AARCH64_TLSDESC_LD64_LO12: + case R_AARCH64_TLSDESC_ADD_LO12: + case R_AARCH64_TLSDESC_OFF_G1: + case R_AARCH64_TLSDESC_OFF_G0_NC: + case R_AARCH64_AUTH_MOVW_GOTOFF_G0: + case R_AARCH64_AUTH_MOVW_GOTOFF_G0_NC: + case R_AARCH64_AUTH_MOVW_GOTOFF_G1: + case R_AARCH64_AUTH_MOVW_GOTOFF_G1_NC: + case R_AARCH64_AUTH_MOVW_GOTOFF_G2: + case R_AARCH64_AUTH_MOVW_GOTOFF_G2_NC: + case R_AARCH64_AUTH_MOVW_GOTOFF_G3: + case R_AARCH64_AUTH_GOT_LD_PREL19: + case R_AARCH64_AUTH_LD64_GOTOFF_LO15: + case R_AARCH64_AUTH_ADR_GOT_PAGE: + case R_AARCH64_AUTH_LD64_GOT_LO12_NC: + case R_AARCH64_AUTH_LD64_GOTPAGE_LO15: + case R_AARCH64_AUTH_GOT_ADD_LO12_NC: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: + case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: + case R_AARCH64_AUTH_TLSDESC_LD64_LO12: + case R_AARCH64_AUTH_TLSDESC_ADD_LO12: return false; } }