22## non-optimized code.
33
44# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
5+ # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
6+ # RUN: --defsym COMPACT=1 %s -o %t.compact.o
57# RUN: link_fdata %s %t.o %t.fdata
6- # RUN: llvm-strip --strip-unneeded %t.o
8+ # RUN: llvm-strip --strip-unneeded %t* .o
79# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static
10+ # RUN: %clang %cflags %t.compact.o -o %t.compact.exe -Wl,-q -static
811# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --lite
12+ # RUN: llvm-bolt %t.compact.exe -o %t.compact.bolt --data %t.fdata --lite \
13+ # RUN: --compact-code-model
914# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.exe \
1015# RUN: | FileCheck %s --check-prefix=CHECK-INPUT
1116# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \
1217# RUN: | FileCheck %s
18+ # RUN: llvm-objdump -d --disassemble-symbols=_start.org.0 %t.bolt \
19+ # RUN: | FileCheck %s --check-prefix=CHECK-PATCH
20+ # RUN: llvm-objdump -d %t.compact.bolt \
21+ # RUN: | FileCheck %s --check-prefix=CHECK-COMPACT
1322
23+ ## In compact mode, make sure we do not create an unnecessary patch thunk.
24+ # CHECK-COMPACT-NOT: <_start.org.0>
1425
1526## Verify that the number of FDEs matches the number of functions in the output
1627## binary. There are three original functions and two optimized.
28+ ## NOTE: at the moment we are emitting extra FDEs for patched functions, thus
29+ ## there is one more FDE for _start.
1730# RUN: llvm-readelf -u %t.bolt | grep -wc FDE \
1831# RUN: | FileCheck --check-prefix=CHECK-FDE %s
19- # CHECK-FDE: 5
32+ # CHECK-FDE: 6
2033
2134## In lite mode, optimized code will be separated from the original .text by
2235## over 128MB, making it impossible for call/bl instructions in cold functions
2841_start:
2942# FDATA: 0 [unknown] 0 1 _start 0 0 100
3043 .cfi_startproc
44+
45+ ## Check that the code at the original location is converted into a
46+ ## veneer/thunk.
47+ # CHECK-PATCH-LABEL: <_start.org.0>
48+ # CHECK-PATCH-NEXT: adrp x16
49+ # CHECK-PATCH-NEXT: add x16, x16,
50+ # CHECK-PATCH-NEXT: br x16
3151 cmp x0, 1
3252 b.eq .L0
3353 bl cold_function
3454.L0:
3555 ret x30
3656 .cfi_endproc
37- .size _start, .-_start
57+ .size _start, .-_start
3858
39- ## Cold non-optimized function with a reference to a hot function (_start) .
59+ ## Cold non-optimized function with references to hot functions .
4060# CHECK: Disassembly of section .bolt.org.text:
4161# CHECK-LABEL: <cold_function>
4262 .globl cold_function
@@ -97,12 +117,26 @@ cold_function:
97117# CHECK-NEXT: nop
98118# CHECK-NEXT: ldr x5
99119
120+ ## Since _start is relocated further than 128MB from the call site, we check
121+ ## that the call is converted into a call to its original version. That original
122+ ## version should contain a veneer/thunk code that we check separately.
123+ bl _start
124+ # CHECK-INPUT-NEXT: bl {{.*}} <_start>
125+ # CHECK-NEXT: bl {{.*}} <_start.org.0>
126+
127+ ## Same as above, but the instruction is a tail call.
128+ b _start
129+ # CHECK-INPUT-NEXT: b {{.*}} <_start>
130+ # CHECK-NEXT: b {{.*}} <_start.org.0>
131+
100132 .cfi_endproc
101- .size cold_function, .-cold_function
133+ .size cold_function, .-cold_function
102134
103- ## Reserve 1MB of space to make functions that follow unreachable by ADRs in
135+ .ifndef COMPACT
136+ ## Reserve 128MB of space to make functions that follow unreachable by ADRs in
104137## code that precedes this gap.
105- .space 0x100000
138+ .space 0x8000000
139+ .endif
106140
107141 .globl far_func
108142 .type far_func, %function
@@ -111,5 +145,4 @@ far_func:
111145 .cfi_startproc
112146 ret x30
113147 .cfi_endproc
114- .size far_func, .-far_func
115-
148+ .size far_func, .-far_func
0 commit comments