Skip to content

Commit d02f446

Browse files
committed
[lld-macho] Choose ICF root function deterministically based on symbol names
Our codebase has global function merging and outlining which enables many ICF instances, but small code changes often pick different root functions which confuses developers. This change improves determinism in ICF root function selection by sorting equivalent functions based on their primary symbol names, preferring external symbols at offset 0 over local symbols, with section names as fallback, ensuring more consistent and predictable folding results across builds.
1 parent 73b24d2 commit d02f446

File tree

5 files changed

+99
-24
lines changed

5 files changed

+99
-24
lines changed

lld/MachO/ICF.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,40 @@ void ICF::run() {
408408
// When using safe_thunks, ensure that we first sort by icfEqClass and
409409
// then by keepUnique (descending). This guarantees that within an
410410
// equivalence class, the keepUnique inputs are always first.
411-
if (config->icfLevel == ICFLevel::safe_thunks)
412-
if (a->icfEqClass[0] == b->icfEqClass[0])
413-
return a->keepUnique > b->keepUnique;
411+
if (a->icfEqClass[0] == b->icfEqClass[0]) {
412+
if (config->icfLevel == ICFLevel::safe_thunks) {
413+
if (a->keepUnique != b->keepUnique)
414+
return a->keepUnique > b->keepUnique;
415+
}
416+
// Within each equivalence class, sort by primary (user) symbol name
417+
// for determinism. Prefer the first non-local (external) symbol at
418+
// offset 0.
419+
auto getPrimarySymbolName = [](const ConcatInputSection *isec) {
420+
const Symbol *firstLocalSymAtZero = nullptr;
421+
422+
// Single pass through symbols to find the best candidate
423+
for (const Symbol *sym : isec->symbols) {
424+
if (auto *d = dyn_cast<Defined>(sym)) {
425+
if (d->value == 0) {
426+
// If we find an external symbol at offset 0, return it
427+
// immediately
428+
if (d->isExternal())
429+
return d->getName();
430+
// Otherwise, remember the first local symbol at offset 0
431+
if (!firstLocalSymAtZero)
432+
firstLocalSymAtZero = sym;
433+
}
434+
}
435+
}
436+
// Return the local symbol at offset 0 if we found one
437+
if (firstLocalSymAtZero)
438+
return firstLocalSymAtZero->getName();
439+
440+
// Fallback: use section name
441+
return isec->getName();
442+
};
443+
return getPrimarySymbolName(a) < getPrimarySymbolName(b);
444+
}
414445
return a->icfEqClass[0] < b->icfEqClass[0];
415446
});
416447
forEachClass([&](size_t begin, size_t end) {

lld/test/MachO/arm64-thunks.s

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
# OBJC: _objc_msgSend$bar:
4141
# OBJC: _objc_msgSend$foo:
4242

43-
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr
43+
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_first_addr
4444
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a
4545
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b
4646
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
@@ -58,12 +58,12 @@
5858
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0
5959
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h
6060
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main
61-
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_high_addr
61+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_second_addr
6262
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0
6363
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1
6464
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
6565
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
66-
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0
66+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_first_addr.thunk.0
6767
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
6868

6969

@@ -243,14 +243,14 @@ _objc_msgSend:
243243
.subsections_via_symbols
244244

245245
.addrsig
246-
.addrsig_sym _fold_func_low_addr
247-
.addrsig_sym _fold_func_high_addr
246+
.addrsig_sym _fold_func_first_addr
247+
.addrsig_sym _fold_func_second_addr
248248

249249
.text
250250

251-
.globl _fold_func_low_addr
251+
.globl _fold_func_first_addr
252252
.p2align 2
253-
_fold_func_low_addr:
253+
_fold_func_first_addr:
254254
add x0, x0, x0
255255
add x1, x0, x1
256256
add x2, x0, x2
@@ -383,16 +383,16 @@ _main:
383383
bl _f
384384
bl _g
385385
bl _h
386-
bl _fold_func_low_addr
387-
bl _fold_func_high_addr
386+
bl _fold_func_first_addr
387+
bl _fold_func_second_addr
388388
bl ___nan
389389
bl _objc_msgSend$foo
390390
bl _objc_msgSend$bar
391391
ret
392392

393-
.globl _fold_func_high_addr
393+
.globl _fold_func_second_addr
394394
.p2align 2
395-
_fold_func_high_addr:
395+
_fold_func_second_addr:
396396
add x0, x0, x0
397397
add x1, x0, x1
398398
add x2, x0, x2

lld/test/MachO/icf-name-order.s

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# REQUIRES: aarch64
2+
# RUN: rm -rf %t; split-file %s %t
3+
4+
# This test verifies ICF deterministically chooses root functions based on symbol names
5+
# regardless of input object file order, with all three identical functions being folded together
6+
# and _a chosen as the root due to lexicographic ordering.
7+
8+
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
9+
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/b.s -o %t/b.o
10+
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/c.s -o %t/c.o
11+
12+
# RUN: %lld -dylib -arch arm64 -lSystem -o %t/abc --icf=all -map %t/abc.txt %t/a.o %t/b.o %t/c.o
13+
# RUN: %lld -dylib -arch arm64 -lSystem -o %t/bac --icf=all -map %t/bac.txt %t/b.o %t/a.o %t/c.o
14+
# RUN: %lld -dylib -arch arm64 -lSystem -o %t/cba --icf=all -map %t/cba.txt %t/c.o %t/b.o %t/a.o
15+
16+
# RUN: cat %t/abc.txt | FileCheck %s
17+
# RUN: cat %t/bac.txt | FileCheck %s
18+
# RUN: cat %t/cba.txt | FileCheck %s
19+
20+
# CHECK: Symbols:
21+
# CHECK: [[#%X,ADDR:]] 0x00000008 {{.*}} _a
22+
# CHECK-NEXT: [[#ADDR]] 0x00000000 {{.*}} _b
23+
# CHECK-NEXT: [[#ADDR]] 0x00000000 {{.*}} _c
24+
25+
#--- a.s
26+
.section __TEXT,__text,regular,pure_instructions
27+
.globl _a
28+
_a:
29+
mov x0, 100
30+
ret
31+
32+
#--- b.s
33+
.section __TEXT,__text,regular,pure_instructions
34+
.globl _b
35+
_b:
36+
mov x0, 100
37+
ret
38+
39+
#--- c.s
40+
.section __TEXT,__text,regular,pure_instructions
41+
.globl _c
42+
_c:
43+
mov x0, 100
44+
ret

lld/test/MachO/icf-safe-thunks.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
; CHECK-ARM64: _func_call_thunked_1_nomerge:
2828
; CHECK-ARM64-NEXT: stp x29
2929
;
30-
; CHECK-ARM64: _func_call_thunked_2_nomerge:
31-
; CHECK-ARM64-NEXT: _func_call_thunked_2_merge:
30+
; CHECK-ARM64: _func_call_thunked_2_first:
31+
; CHECK-ARM64-NEXT: _func_call_thunked_2_second:
3232
; CHECK-ARM64-NEXT: stp x29
3333
;
3434
; CHECK-ARM64: _call_all_funcs:
@@ -53,8 +53,8 @@
5353
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v2_canmerge
5454
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_3identical_v3_canmerge
5555
; CHECK-ARM64-MAP-NEXT: 0x00000020 [ 2] _func_call_thunked_1_nomerge
56-
; CHECK-ARM64-MAP-NEXT: 0x00000020 [ 2] _func_call_thunked_2_nomerge
57-
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_call_thunked_2_merge
56+
; CHECK-ARM64-MAP-NEXT: 0x00000020 [ 2] _func_call_thunked_2_first
57+
; CHECK-ARM64-MAP-NEXT: 0x00000000 [ 2] _func_call_thunked_2_second
5858
; CHECK-ARM64-MAP-NEXT: 0x00000034 [ 2] _call_all_funcs
5959
; CHECK-ARM64-MAP-NEXT: 0x00000050 [ 2] _take_func_addr
6060
; CHECK-ARM64-MAP-NEXT: 0x00000004 [ 2] _func_2identical_v2
@@ -93,12 +93,12 @@ ATTR void func_call_thunked_1_nomerge() {
9393
g_val = 77;
9494
}
9595

96-
ATTR void func_call_thunked_2_nomerge() {
96+
ATTR void func_call_thunked_2_first() {
9797
func_2identical_v2();
9898
g_val = 77;
9999
}
100100

101-
ATTR void func_call_thunked_2_merge() {
101+
ATTR void func_call_thunked_2_second() {
102102
func_2identical_v2();
103103
g_val = 77;
104104
}
@@ -205,14 +205,14 @@ define void @func_call_thunked_1_nomerge() local_unnamed_addr #1 {
205205
}
206206

207207
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync)
208-
define void @func_call_thunked_2_nomerge() local_unnamed_addr #1 {
208+
define void @func_call_thunked_2_first() local_unnamed_addr #1 {
209209
tail call void @func_2identical_v2()
210210
store volatile i8 77, ptr @g_val, align 1, !tbaa !4
211211
ret void
212212
}
213213

214214
; Function Attrs: mustprogress nofree noinline norecurse nounwind ssp uwtable(sync)
215-
define void @func_call_thunked_2_merge() local_unnamed_addr #1 {
215+
define void @func_call_thunked_2_second() local_unnamed_addr #1 {
216216
tail call void @func_2identical_v2()
217217
store volatile i8 77, ptr @g_val, align 1, !tbaa !4
218218
ret void

lld/test/MachO/map-file.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@
110110
## folded symbols but not folded cstrings; we print both.
111111

112112
# ICF: Symbols:
113-
# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 3] __ZTIN3foo3bar4MethE
114-
# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 2] _bar
113+
# ICF-DAG: 0x[[#%X,FOO:]] 0x00000001 [ 3] __ZTIN3foo3bar4MethE
114+
# ICF-DAG: 0x[[#FOO]] 0x00000000 [ 2] _bar
115115
# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 4] literal string: Hello world!\n
116116
# ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 4] literal string: Hello world!\n
117117

0 commit comments

Comments
 (0)