Skip to content

Commit 0489682

Browse files
committed
Merging r360405:
------------------------------------------------------------------------ r360405 | maskray | 2019-05-09 22:51:00 -0700 (Thu, 09 May 2019) | 25 lines [PPC64] Define getThunkSectionSpacing() based on the range of R_PPC64_REL24 Suggested by Sean Fertile and Peter Smith. Thunk section spacing decrease the total number of thunks. I measured a decrease of 1% or less in some large programs, with no perceivable slowdown in link time. Override getThunkSectionSpacing() to enable it. 0x2000000 is the farthest point R_PPC64_REL24 can reach. I tried several numbers and found 0x2000000 works the best. Numbers near 0x2000000 work as well but let's just use the simpler number. As demonstrated by the updated tests, this essentially changes placement of most thunks to the end of the output section. We leverage this property to fix PR40740 reported by Alfredo Dal'Ava Júnior: The output section .init consists of input sections from several object files (crti.o crtbegin.o crtend.o crtn.o). Sections other than the last one do not have a terminator. With this patch, we create the thunk after the last .init input section and thus fix the issue. This is not foolproof but works quite well for such sections (with no terminator) in practice. Reviewed By: ruiu, sfertile Differential Revision: https://reviews.llvm.org/D61720 ------------------------------------------------------------------------ llvm-svn: 362274
1 parent f1cacab commit 0489682

9 files changed

+101
-59
lines changed

lld/ELF/Arch/PPC64.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class PPC64 final : public TargetInfo {
113113
void writeGotHeader(uint8_t *Buf) const override;
114114
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
115115
uint64_t BranchAddr, const Symbol &S) const override;
116+
uint32_t getThunkSectionSpacing() const override;
116117
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
117118
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
118119
RelExpr Expr) const override;
@@ -759,6 +760,14 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
759760
return !inBranchRange(Type, BranchAddr, S.getVA());
760761
}
761762

763+
uint32_t PPC64::getThunkSectionSpacing() const {
764+
// See comment in Arch/ARM.cpp for a more detailed explanation of
765+
// getThunkSectionSpacing(). For PPC64 we pick the constant here based on
766+
// R_PPC64_REL24, which is used by unconditional branch instructions.
767+
// 0x2000000 = (1 << 24-1) * 4
768+
return 0x2000000;
769+
}
770+
762771
bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
763772
int64_t Offset = Dst - Src;
764773
if (Type == R_PPC64_REL14)

lld/test/ELF/ppc64-bsymbolic-toc-restore.s

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ caller:
5353
# CHECK-LABEL: caller
5454
# CHECK: bl .+44
5555
# CHECK-NEXT: mr 31, 3
56-
# CHECK-NEXT: bl .-48
56+
# CHECK-NEXT: bl .+44
5757
# CHECK-NEXT: ld 2, 24(1)
5858
# CHECK-NEXT: add 3, 3, 31
5959
# CHECK-NEXT: addi 1, 1, 32
@@ -63,6 +63,6 @@ caller:
6363
# CHECK-EMPTY:
6464
# CHECK-NEXT: def:
6565
# CHECK-NEXT: addis 2, 12, 2
66-
# CHECK-NEXT: addi 2, 2, -32636
66+
# CHECK-NEXT: addi 2, 2, -32616
6767
# CHECK-NEXT: li 3, 55
6868
# CHECK-NEXT: blr

lld/test/ELF/ppc64-call-reach.s

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,27 +65,24 @@ test:
6565
# NEGOFFSET: 10010014: bl .-33554432
6666
# NEGOFFSET: 10010024: b .+33554432
6767

68+
# THUNK-LABEL: test:
69+
# THUNK: 10010014: bl .+20
70+
# THUNK: 10010024: b .+20
71+
6872
# .branch_lt[0]
6973
# THUNK-LABEL: __long_branch_callee:
70-
# THUNK-NEXT: 10010000: addis 12, 2, -1
74+
# THUNK-NEXT: 10010028: addis 12, 2, -1
7175
# THUNK-NEXT: ld 12, -32768(12)
7276
# THUNK-NEXT: mtctr 12
7377
# THUNK-NEXT: bctr
7478

7579
# .branch_lt[1]
7680
# THUNK-LABEL: __long_branch_tail_callee:
77-
# THUNK-NEXT: 10010010: addis 12, 2, -1
81+
# THUNK-NEXT: 10010038: addis 12, 2, -1
7882
# THUNK-NEXT: ld 12, -32760(12)
7983
# THUNK-NEXT: mtctr 12
8084
# THUNK-NEXT: bctr
8185

82-
# Each call now branches to a thunk, and although it is printed as positive
83-
# the offset is interpreted as a signed 26 bit value so 67108812 is actually
84-
# -52.
85-
# THUNK-LABEL: test:
86-
# THUNK: 10010034: bl .-52
87-
# THUNK: 10010044: b .+67108812
88-
8986
# The offset from the TOC to the .branch_lt section is (-1 << 16) - 32768.
9087
# Name Type Address Off Size
9188
# BRANCHLT: .branch_lt PROGBITS 0000000010020000 020000 000010

lld/test/ELF/ppc64-ifunc.s

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,21 @@
1515
# RUN: llvm-readelf -r %t | FileCheck --check-prefix=DYNREL %s
1616

1717
# NM-DAG: 0000000010028000 d .TOC.
18-
# NM-DAG: 0000000010010028 T ifunc
19-
# NM-DAG: 000000001001002c T ifunc2
18+
# NM-DAG: 0000000010010000 T ifunc
19+
# NM-DAG: 0000000010010004 T ifunc2
2020

2121
# SECTIONS: .plt NOBITS 0000000010030000
2222

23+
# __plt_ifunc - . = 0x10010020 - 0x10010010 = 16
24+
# __plt_ifunc2 - . = 0x10010044 - 0x10010018 = 28
25+
# CHECK: _start:
26+
# CHECK-NEXT: addis 2, 12, 1
27+
# CHECK-NEXT: addi 2, 2, 32760
28+
# CHECK-NEXT: 10010010: bl .+16
29+
# CHECK-NEXT: ld 2, 24(1)
30+
# CHECK-NEXT: 10010018: bl .+28
31+
# CHECK-NEXT: ld 2, 24(1)
32+
2333
# .plt[0] - .TOC. = 0x10030000 - 0x10028000 = (1<<16) - 32768
2434
# CHECK: __plt_ifunc:
2535
# CHECK-NEXT: std 2, 24(1)
@@ -36,19 +46,9 @@
3646
# CHECK-NEXT: mtctr 12
3747
# CHECK-NEXT: bctr
3848

39-
# __plt_ifunc - . = 0x10010000 - 0x10010038 = -56
40-
# __plt_ifunc2 - . = 0x10010014 - 0x10010040 = -44
41-
# CHECK: _start:
42-
# CHECK-NEXT: addis 2, 12, 1
43-
# CHECK-NEXT: addi 2, 2, 32720
44-
# CHECK-NEXT: 10010038: bl .-56
45-
# CHECK-NEXT: ld 2, 24(1)
46-
# CHECK-NEXT: 10010040: bl .-44
47-
# CHECK-NEXT: ld 2, 24(1)
48-
4949
# Check that we emit 2 R_PPC64_IRELATIVE.
50-
# DYNREL: R_PPC64_IRELATIVE 10010028
51-
# DYNREL: R_PPC64_IRELATIVE 1001002c
50+
# DYNREL: R_PPC64_IRELATIVE 10010000
51+
# DYNREL: R_PPC64_IRELATIVE 10010004
5252

5353
.type ifunc STT_GNU_IFUNC
5454
.globl ifunc

lld/test/ELF/ppc64-local-dynamic.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ k:
113113
// Dis: test:
114114
// Dis: addis 3, 2, 0
115115
// Dis-NEXT: addi 3, 3, -32760
116-
// Dis-NEXT: bl .-60
116+
// Dis-NEXT: bl .+60
117117
// Dis-NEXT: ld 2, 24(1)
118118
// Dis-NEXT: addis 3, 3, 0
119119
// Dis-NEXT: lwa 3, -32768(3)

lld/test/ELF/ppc64-long-branch-init.s

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# REQUIRES: ppc
2+
3+
# RUN: llvm-mc -filetype=obj -triple=powerpc64-pc-freebsd13.0 %s -o %t.o
4+
# RUN: ld.lld %t.o -o %t
5+
# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
6+
7+
## .init consists of sections from several object files. Sections other than the
8+
## last one do not have a terminator. Check we do not create a long branch stub
9+
## in the middle.
10+
## We currently use thunk section spacing to ensure the stub is in the end. This
11+
## is not foolproof but good enough to not break in practice.
12+
13+
# CHECK: Disassembly of section .init:
14+
# CHECK-LABEL: _init:
15+
# CHECK: blr
16+
# CHECK-EMPTY:
17+
# CHECK-LABEL: __long_branch_foo:
18+
19+
.globl foo
20+
foo:
21+
.space 0x2000000
22+
blr
23+
24+
.section .init,"ax",@progbits,unique,0
25+
.globl _init
26+
_init:
27+
stdu 1, -48(1)
28+
mflr 0
29+
std 0, 64(1)
30+
31+
.section .init,"ax",@progbits,unique,1
32+
bl foo
33+
nop
34+
35+
.section .init,"ax",@progbits,unique,2
36+
bl foo
37+
nop
38+
39+
.section .init,"ax",@progbits,unique,3
40+
ld 1, 0(1)
41+
ld 0, 16(1)
42+
mtlr 0
43+
blr

lld/test/ELF/ppc64-plt-stub.s

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,26 @@
44
// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
55
// RUN: ld.lld -shared %t2.o -o %t2.so
66
// RUN: ld.lld %t.o %t2.so -o %t
7-
// RUN: llvm-objdump -d %t | FileCheck %s
7+
// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
88

99
// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
1010
// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
1111
// RUN: ld.lld -shared %t2.o -o %t2.so
1212
// RUN: ld.lld %t.o %t2.so -o %t
13-
// RUN: llvm-objdump -d %t | FileCheck %s
13+
// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
1414

1515
// CHECK: Disassembly of section .text:
16-
// CHECK-NEXT: __plt_foo:
16+
// CHECK-NEXT: _start:
17+
// CHECK: 10010008: bl .+16
18+
19+
// CHECK-LABEL: 0000000010010018 __plt_foo:
1720
// CHECK-NEXT: std 2, 24(1)
1821
// CHECK-NEXT: addis 12, 2, 0
1922
// CHECK-NEXT: ld 12, 32560(12)
2023
// CHECK-NEXT: mtctr 12
2124
// CHECK-NEXT: bctr
2225

2326

24-
// CHECK: _start:
25-
// CHECK: bl .-40
2627
.text
2728
.abiversion 2
2829
.globl _start

lld/test/ELF/ppc64-toc-restore-recursive-call.s

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@
1414
# for recursive calls as well as keeps the logic for recursive calls consistent
1515
# with non-recursive calls.
1616

17-
# CHECK-LABEL: __plt_recursive_func:
18-
# CHECK-NEXT: 10000:
19-
# CHECK-LABEL: recursive_func
20-
# CHECK-NEXT: 10014:
21-
# CHECK: 1003c: bl .-60
22-
# CHECK-NEXT: 10040: ld 2, 24(1)
17+
# CHECK-LABEL: 0000000000010000 recursive_func:
18+
# CHECK: 10028: bl .+32
19+
# CHECK-NEXT: ld 2, 24(1)
20+
21+
# CHECK-LABEL: 0000000000010048 __plt_recursive_func:
2322

2423
.abiversion 2
2524
.section ".text"

lld/test/ELF/ppc64-toc-restore.s

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,11 @@ _start:
2828
bl foo
2929
nop
3030
bl bar_local
31-
32-
33-
// CHECK: Disassembly of section .text:
34-
// CHECK: _start:
35-
// CHECK: 1001001c: bl .-28
36-
// CHECK-NOT: 10010020: nop
37-
// CHECK: 10010020: ld 2, 24(1)
38-
// CHECK: 10010024: bl .-16
39-
// CHECK-NOT: 10010028: nop
40-
// CHECK-NOT: 10010028: ld 2, 24(1)
31+
// CHECK-LABEL: _start:
32+
// CHECK-NEXT: 10010008: bl .+64
33+
// CHECK-NEXT: 1001000c: ld 2, 24(1)
34+
// CHECK-NEXT: 10010010: bl .-16
35+
// CHECK-EMPTY:
4136

4237
# Calling a function in another object file which will have same
4338
# TOC base does not need a nop. If nop present, do not rewrite to
@@ -47,26 +42,24 @@ _diff_object:
4742
bl foo_not_shared
4843
bl foo_not_shared
4944
nop
50-
51-
// CHECK: _diff_object:
52-
// CHECK-NEXT: 10010028: bl .+24
53-
// CHECK-NEXT: 1001002c: bl .+20
54-
// CHECK-NEXT: 10010030: nop
45+
// CHECK-LABEL: _diff_object:
46+
// CHECK-NEXT: 10010014: bl .+28
47+
// CHECK-NEXT: 10010018: bl .+24
48+
// CHECK-NEXT: 1001001c: nop
5549

5650
# Branching to a local function does not need a nop
5751
.global noretbranch
5852
noretbranch:
5953
b bar_local
60-
// CHECK: noretbranch:
61-
// CHECK: 10010034: b .+67108832
62-
// CHECK-NOT: 10010038: nop
63-
// CHECK-NOT: 1001003c: ld 2, 24(1)
54+
// CHECK-LABEL: noretbranch:
55+
// CHECK: 10010020: b .+67108832
56+
// CHECK-EMPTY:
6457

6558
// This should come last to check the end-of-buffer condition.
6659
.global last
6760
last:
6861
bl foo
6962
nop
70-
// CHECK: last:
71-
// CHECK: 10010038: bl .-56
72-
// CHECK-NEXT: 1001003c: ld 2, 24(1)
63+
// CHECK-LABEL: last:
64+
// CHECK-NEXT: 10010024: bl .+36
65+
// CHECK-NEXT: 10010028: ld 2, 24(1)

0 commit comments

Comments
 (0)