Skip to content

Commit 79fff6a

Browse files
authored
[lld][BP] Avoid ordering ICF'ed sections (#126327)
ICF runs before BPSectionOrderer. When a section is ICF'ed, it seems that the original sections are marked as not live, but are still kept around. Prior to this patch, those ICF'ed sections would be passed to BP and ordered before being skipped when writing the output. Now, these sections are no longer passed to BP, saving runtime and possibly improving BP's output. In a large binary, I found that the number of sections ordered using BP decreased, while the number of duplicate sections drastically decreased as expected. ``` Functions for startup: 50755 -> 50520 Functions for compression: 165734 -> 105328 Duplicate functions: 1827231 -> 55230 ```
1 parent 55f3df8 commit 79fff6a

File tree

4 files changed

+87
-19
lines changed

4 files changed

+87
-19
lines changed

lld/ELF/BPSectionOrderer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,11 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
7575
auto *d = dyn_cast<Defined>(&sym);
7676
if (!d)
7777
return;
78-
auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
79-
if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
78+
auto *sec = dyn_cast_or_null<InputSection>(d->section);
79+
// Skip empty, discarded, ICF folded sections. Skipping ICF folded sections
80+
// reduces duplicate detection work in BPSectionOrderer.
81+
if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
82+
!orderer.secToSym.try_emplace(sec, d).second)
8083
return;
8184
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
8285
.insert(sections.size());

lld/MachO/BPSectionOrderer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
117117
auto *isec = subsec.isec;
118118
if (!isec || isec->data.empty())
119119
continue;
120+
// ConcatInputSections are entirely live or dead, so the offset is
121+
// irrelevant.
122+
if (isa<ConcatInputSection>(isec) && !isec->isLive(0))
123+
continue;
120124
size_t idx = sections.size();
121125
sections.emplace_back(isec);
122126
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {

lld/test/ELF/bp-section-orderer.s

Lines changed: 68 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# NOTE: Code has been autogenerated by utils/update_test_body.py
12
# REQUIRES: aarch64
23
# RUN: rm -rf %t && split-file %s %t && cd %t
34

@@ -18,37 +19,40 @@
1819

1920
# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
2021
# RUN: llvm-profdata merge a.proftext -o a.profdata
21-
# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
22+
# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
2223

2324
# STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
2425
# STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
2526

2627
# RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
2728
# RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
28-
# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,{{$}}
29+
# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d3,d2,d1,{{$}}
2930

3031
# RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
3132
# RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
32-
# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d3,d2,d4,d1,{{$}}
33+
# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,merged1,merged2,_start,d3,d2,d4,d1,{{$}}
3334

3435
# RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
36+
# RUN: ld.lld -o out.cf.icf a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-ICF-FUNC
3537
# RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
36-
# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,{{$}}
38+
# CFUNC: s5,s4,s3,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d3,d2,d1,{{$}}
3739

3840
# RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
3941
# RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
40-
# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,_start,d4,d1,d3,d2,{{$}}
42+
# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
4143

4244
# RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
43-
# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
45+
# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CBOTH
46+
# CBOTH: s5,s3,s4,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d1,d3,d2,{{$}}
4447

4548
# RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
4649
# RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
47-
# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,_start,d4,d1,d3,d2,{{$}}
50+
# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
4851

49-
# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
52+
# BP-COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
53+
# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections using balanced partitioning
5054
# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
51-
# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
55+
# BP-COMPRESSION-BOTH: Ordered 18 sections using balanced partitioning
5256

5357
#--- a.proftext
5458
:ir
@@ -114,17 +118,24 @@ int d3[] = {5,6,7,8};
114118
int d2[] = {7,8,9,10};
115119
int d1[] = {3,4,5,6};
116120

121+
// used is to suppress compiler garbage collection in ELF; retain is to suppress linker garbage collection; used is not needed for non-internal linkage symbols
122+
// used is for both compiler/linker GC in Mach-O; retain is ignored for Mach-O
123+
#define RETAIN [[gnu::used,gnu::retain]]
124+
117125
int C(int a);
118126
int B(int a);
119127
void A();
120128

121129
int F(int a) { return C(a + 3); }
122-
int E(int a) { return C(a + 2); }
123-
int D(int a) { return B(a + 2); }
130+
RETAIN int E(int a) { return C(a + 2); }
131+
RETAIN int D(int a) { return B(a + 2); }
124132
int C(int a) { A(); return a + 2; }
125133
int B(int a) { A(); return a + 1; }
126134
void A() {}
127135

136+
RETAIN int merged1(int a) { return F(a + 101); }
137+
int merged2(int a) { return F(a + 101); }
138+
128139
int _start() { return 0; }
129140

130141
#--- gen
@@ -169,7 +180,7 @@ C: // @C
169180
.Lfunc_end1:
170181
.size C, .Lfunc_end1-C
171182
// -- End function
172-
.section .text.E,"ax",@progbits
183+
.section .text.E,"axR",@progbits
173184
.globl E // -- Begin function E
174185
.p2align 2
175186
.type E,@function
@@ -188,7 +199,7 @@ E: // @E
188199
.Lfunc_end2:
189200
.size E, .Lfunc_end2-E
190201
// -- End function
191-
.section .text.D,"ax",@progbits
202+
.section .text.D,"axR",@progbits
192203
.globl D // -- Begin function D
193204
.p2align 2
194205
.type D,@function
@@ -236,6 +247,44 @@ A: // @A
236247
.Lfunc_end5:
237248
.size A, .Lfunc_end5-A
238249
// -- End function
250+
.section .text.merged1,"axR",@progbits
251+
.globl merged1 // -- Begin function merged1
252+
.p2align 2
253+
.type merged1,@function
254+
merged1: // @merged1
255+
// %bb.0: // %entry
256+
sub sp, sp, #32
257+
stp x29, x30, [sp, #16] // 16-byte Folded Spill
258+
add x29, sp, #16
259+
stur w0, [x29, #-4]
260+
ldur w8, [x29, #-4]
261+
add w0, w8, #101
262+
bl F
263+
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
264+
add sp, sp, #32
265+
ret
266+
.Lfunc_end6:
267+
.size merged1, .Lfunc_end6-merged1
268+
// -- End function
269+
.section .text.merged2,"ax",@progbits
270+
.globl merged2 // -- Begin function merged2
271+
.p2align 2
272+
.type merged2,@function
273+
merged2: // @merged2
274+
// %bb.0: // %entry
275+
sub sp, sp, #32
276+
stp x29, x30, [sp, #16] // 16-byte Folded Spill
277+
add x29, sp, #16
278+
stur w0, [x29, #-4]
279+
ldur w8, [x29, #-4]
280+
add w0, w8, #101
281+
bl F
282+
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
283+
add sp, sp, #32
284+
ret
285+
.Lfunc_end7:
286+
.size merged2, .Lfunc_end7-merged2
287+
// -- End function
239288
.section .text._start,"ax",@progbits
240289
.globl _start // -- Begin function _start
241290
.p2align 2
@@ -244,8 +293,8 @@ _start: // @_start
244293
// %bb.0: // %entry
245294
mov w0, wzr
246295
ret
247-
.Lfunc_end6:
248-
.size _start, .Lfunc_end6-_start
296+
.Lfunc_end8:
297+
.size _start, .Lfunc_end8-_start
249298
// -- End function
250299
.type s5,@object // @s5
251300
.section .rodata.s5,"a",@progbits
@@ -330,6 +379,10 @@ d1:
330379

331380
.section ".note.GNU-stack","",@progbits
332381
.addrsig
382+
.addrsig_sym F
333383
.addrsig_sym C
384+
.addrsig_sym E
385+
.addrsig_sym D
334386
.addrsig_sym B
335387
.addrsig_sym A
388+
.addrsig_sym merged1

lld/test/MachO/bp-section-orderer.s

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,15 @@
4242
# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
4343

4444
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
45+
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ICF-FUNC
4546
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
4647
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
4748
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=%t/a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
4849

49-
# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
50+
# COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
51+
# COMPRESSION-ICF-FUNC: Ordered 7 sections using balanced partitioning
5052
# COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
51-
# COMPRESSION-BOTH: Ordered 14 sections using balanced partitioning
53+
# COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
5254

5355
#--- a.s
5456
.text
@@ -78,6 +80,12 @@ F:
7880
add w0, w0, #3
7981
bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
8082
ret
83+
merged1:
84+
add w0, w0, #101
85+
ret
86+
merged2:
87+
add w0, w0, #101
88+
ret
8189

8290
.data
8391
s1:

0 commit comments

Comments
 (0)