diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index 7b31378c3781e..d87fb42dcf19a 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -408,11 +408,35 @@ void ICF::run() { // When using safe_thunks, ensure that we first sort by icfEqClass and // then by keepUnique (descending). This guarantees that within an // equivalence class, the keepUnique inputs are always first. - if (config->icfLevel == ICFLevel::safe_thunks) - if (a->icfEqClass[0] == b->icfEqClass[0]) - return a->keepUnique > b->keepUnique; + if (a->icfEqClass[0] == b->icfEqClass[0]) { + if (config->icfLevel == ICFLevel::safe_thunks) { + if (a->keepUnique != b->keepUnique) + return a->keepUnique > b->keepUnique; + } + // Within each equivalence class, sort by symbol name for determinism. + // If a section has an external symbol at offset 0, use that name. + // If either section doesn't have such a symbol, retain the original + // order. + auto getExternalSymbolName = + [](const ConcatInputSection *isec) -> std::optional { + for (const Symbol *sym : isec->symbols) { + if (const auto *d = dyn_cast(sym)) { + if (d->value == 0 && d->isExternal()) + return d->getName(); + } + } + return std::nullopt; + }; + auto nameA = getExternalSymbolName(a); + auto nameB = getExternalSymbolName(b); + // For determinism, define a strict ordering only when both sections + // have a primary symbol. Otherwise, they are considered equivalent + // (the expression returns false), preserving the stable sort order. + return nameA && nameB && *nameA < *nameB; + } return a->icfEqClass[0] < b->icfEqClass[0]; }); + forEachClass([&](size_t begin, size_t end) { segregate(begin, end, &ICF::equalsConstant); }); diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s index cd3842895c47e..79e4350f4ca5e 100644 --- a/lld/test/MachO/arm64-thunks.s +++ b/lld/test/MachO/arm64-thunks.s @@ -40,7 +40,7 @@ # OBJC: _objc_msgSend$bar: # OBJC: _objc_msgSend$foo: -# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr +# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_first_addr # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c @@ -58,12 +58,12 @@ # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main -# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_high_addr +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_second_addr # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1 -# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_first_addr.thunk.0 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z @@ -243,14 +243,14 @@ _objc_msgSend: .subsections_via_symbols .addrsig -.addrsig_sym _fold_func_low_addr -.addrsig_sym _fold_func_high_addr +.addrsig_sym _fold_func_first_addr +.addrsig_sym _fold_func_second_addr .text -.globl _fold_func_low_addr +.globl _fold_func_first_addr .p2align 2 -_fold_func_low_addr: +_fold_func_first_addr: add x0, x0, x0 add x1, x0, x1 add x2, x0, x2 @@ -383,16 +383,16 @@ _main: bl _f bl _g bl _h - bl _fold_func_low_addr - bl _fold_func_high_addr + bl _fold_func_first_addr + bl _fold_func_second_addr bl ___nan bl _objc_msgSend$foo bl _objc_msgSend$bar ret -.globl _fold_func_high_addr +.globl _fold_func_second_addr .p2align 2 -_fold_func_high_addr: +_fold_func_second_addr: add x0, x0, x0 add x1, x0, x1 add x2, x0, x2 diff --git a/lld/test/MachO/icf-name-order.s b/lld/test/MachO/icf-name-order.s new file mode 100644 index 0000000000000..856f944b217f0 --- /dev/null +++ b/lld/test/MachO/icf-name-order.s @@ -0,0 +1,44 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t + +# This test verifies ICF deterministically chooses root functions based on symbol names +# regardless of input object file order, with all three identical functions being folded together +# and _a chosen as the root due to lexicographic ordering. + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/b.s -o %t/b.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/c.s -o %t/c.o + +# RUN: %lld -dylib -arch arm64 -lSystem -o %t/abc --icf=all -map %t/abc.txt %t/a.o %t/b.o %t/c.o +# RUN: %lld -dylib -arch arm64 -lSystem -o %t/bac --icf=all -map %t/bac.txt %t/b.o %t/a.o %t/c.o +# RUN: %lld -dylib -arch arm64 -lSystem -o %t/cba --icf=all -map %t/cba.txt %t/c.o %t/b.o %t/a.o + +# RUN: cat %t/abc.txt | FileCheck %s +# RUN: cat %t/bac.txt | FileCheck %s +# RUN: cat %t/cba.txt | FileCheck %s + +# CHECK: Symbols: +# CHECK: [[#%X,ADDR:]] 0x00000008 {{.*}} _a +# CHECK-NEXT: [[#ADDR]] 0x00000000 {{.*}} _b +# CHECK-NEXT: [[#ADDR]] 0x00000000 {{.*}} _c + +#--- a.s +.section __TEXT,__text,regular,pure_instructions + .globl _a +_a: + mov x0, 100 + ret + +#--- b.s +.section __TEXT,__text,regular,pure_instructions + .globl _b +_b: + mov x0, 100 + ret + +#--- c.s +.section __TEXT,__text,regular,pure_instructions + .globl _c +_c: + mov x0, 100 + ret diff --git a/lld/test/MachO/icf-safe-thunks.ll b/lld/test/MachO/icf-safe-thunks.ll index 12f1e81bdf3e8..de15a067e6a5c 100644 --- a/lld/test/MachO/icf-safe-thunks.ll +++ b/lld/test/MachO/icf-safe-thunks.ll @@ -27,8 +27,8 @@ ; CHECK-ARM64: _func_call_thunked_1_nomerge: ; CHECK-ARM64-NEXT: stp x29 ; -; CHECK-ARM64: _func_call_thunked_2_nomerge: -; CHECK-ARM64-NEXT: _func_call_thunked_2_merge: +; CHECK-ARM64: _func_call_thunked_2_first: +; CHECK-ARM64-NEXT: _func_call_thunked_2_second: ; CHECK-ARM64-NEXT: stp x29 ; ; CHECK-ARM64: _call_all_funcs: @@ -53,8 +53,8 @@ ; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v2_canmerge ; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v3_canmerge ; CHECK-ARM64-MAP-NEXT: 0x00000020 [ 2] _func_call_thunked_1_nomerge -; CHECK-ARM64-MAP-NEXT: 0x00000020 [ 2] _func_call_thunked_2_nomerge -; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_call_thunked_2_merge +; CHECK-ARM64-MAP-NEXT: 0x00000020 [ 2] _func_call_thunked_2_first +; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_call_thunked_2_second ; CHECK-ARM64-MAP-NEXT: 0x00000034 [ 2] _call_all_funcs ; CHECK-ARM64-MAP-NEXT: 0x00000050 [ 2] _take_func_addr ; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_2identical_v2 @@ -93,12 +93,12 @@ ATTR void func_call_thunked_1_nomerge() { g_val = 77; } -ATTR void func_call_thunked_2_nomerge() { +ATTR void func_call_thunked_2_first() { func_2identical_v2(); g_val = 77; } -ATTR void func_call_thunked_2_merge() { +ATTR void func_call_thunked_2_second() { func_2identical_v2(); g_val = 77; } @@ -205,14 +205,14 @@ define void @func_call_thunked_1_nomerge() local_unnamed_addr #1 { } ; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync) -define void @func_call_thunked_2_nomerge() local_unnamed_addr #1 { +define void @func_call_thunked_2_first() local_unnamed_addr #1 { tail call void @func_2identical_v2() store volatile i8 77, ptr @g_val, align 1, !tbaa !4 ret void } ; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync) -define void @func_call_thunked_2_merge() local_unnamed_addr #1 { +define void @func_call_thunked_2_second() local_unnamed_addr #1 { tail call void @func_2identical_v2() store volatile i8 77, ptr @g_val, align 1, !tbaa !4 ret void diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s index aa9fff9938eb2..acdfcb04a2245 100644 --- a/lld/test/MachO/map-file.s +++ b/lld/test/MachO/map-file.s @@ -110,8 +110,8 @@ ## folded symbols but not folded cstrings; we print both. # ICF: Symbols: -# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 3] __ZTIN3foo3bar4MethE -# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 2] _bar +# ICF-DAG: 0x[[#%X,FOO:]] 0x00000001 [ 3] __ZTIN3foo3bar4MethE +# ICF-DAG: 0x[[#FOO]] 0x00000000 [ 2] _bar # ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 4] literal string: Hello world!\n # ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 4] literal string: Hello world!\n