Skip to content

Commit 8e6fb0e

Browse files
authored
Reapply "[BOLT][BTI] Skip inlining BasicBlocks containing indirect tailcalls" (#169881) (#169929)
This reapplies commit 5d6d743. Fix: added assertions to the requirements of the test -------- Original commit message: In the Inliner pass, tailcalls are converted to calls in the inlined BasicBlock. If the tailcall is indirect, the `BR` is converted to `BLR`. These instructions require different BTI landing pads at their targets. As the targets of indirect tailcalls are unknown, inlining such blocks is unsound for BTI: they should be skipped instead.
1 parent 8079d03 commit 8e6fb0e

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

bolt/lib/Passes/Inliner.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,32 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
491491
}
492492
}
493493

494+
// AArch64 BTI:
495+
// If the callee has an indirect tailcall (BR), we would transform it to
496+
// an indirect call (BLR) in InlineCall. Because of this, we would have to
497+
// update the BTI at the target of the tailcall. However, these targets
498+
// are not known. Instead, we skip inlining blocks with indirect
499+
// tailcalls.
500+
auto HasIndirectTailCall = [&](const BinaryFunction &BF) -> bool {
501+
for (const auto &BB : BF) {
502+
for (const auto &II : BB) {
503+
if (BC.MIB->isIndirectBranch(II) && BC.MIB->isTailCall(II)) {
504+
return true;
505+
}
506+
}
507+
}
508+
return false;
509+
};
510+
511+
if (BC.isAArch64() && BC.usesBTI() &&
512+
HasIndirectTailCall(*TargetFunction)) {
513+
++InstIt;
514+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Skipping inlining block with tailcall"
515+
<< " in " << Function << " : " << BB->getName()
516+
<< " to keep BTIs consistent.\n");
517+
continue;
518+
}
519+
494520
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
495521
<< " in " << Function << " : " << BB->getName()
496522
<< ". Count: " << BB->getKnownExecutionCount()

bolt/test/AArch64/inline-bti-dbg.s

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# This test checks that for AArch64 binaries with BTI, we do not inline blocks with indirect tailcalls.
2+
# Same as inline-bti.s, but checks the debug output, and therefore requires assertions.
3+
4+
# REQUIRES: system-linux, assertions
5+
6+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
7+
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti
8+
# RUN: llvm-bolt --inline-all %t.exe -o %t.bolt --debug 2>&1 | FileCheck %s
9+
10+
# For BTI, we should not inline foo.
11+
# CHECK: BOLT-DEBUG: Skipping inlining block with tailcall in _Z3barP1A : .LBB01 to keep BTIs consistent.
12+
# CHECK-NOT: BOLT-INFO: inlined {{[0-9]+}} calls at {{[0-9]+}} call sites in {{[0-9]+}} iteration(s). Change in binary size: {{[0-9]+}} bytes.
13+
14+
.text
15+
.globl _Z3fooP1A
16+
.type _Z3fooP1A,@function
17+
_Z3fooP1A:
18+
ldr x8, [x0]
19+
ldr w0, [x8]
20+
br x30
21+
.size _Z3fooP1A, .-_Z3fooP1A
22+
23+
.globl _Z3barP1A
24+
.type _Z3barP1A,@function
25+
_Z3barP1A:
26+
stp x29, x30, [sp, #-16]!
27+
mov x29, sp
28+
bl _Z3fooP1A
29+
mul w0, w0, w0
30+
ldp x29, x30, [sp], #16
31+
ret
32+
.size _Z3barP1A, .-_Z3barP1A
33+
34+
.globl main
35+
.p2align 2
36+
.type main,@function
37+
main:
38+
mov w0, wzr
39+
ret
40+
.size main, .-main

bolt/test/AArch64/inline-bti.s

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
## This test checks that for AArch64 binaries with BTI, we do not inline blocks with indirect tailcalls.
2+
3+
# REQUIRES: system-linux
4+
5+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
6+
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti
7+
# RUN: llvm-bolt --inline-all %t.exe -o %t.bolt | FileCheck %s
8+
9+
# For BTI, we should not inline foo.
10+
# CHECK-NOT: BOLT-INFO: inlined {{[0-9]+}} calls at {{[0-9]+}} call sites in {{[0-9]+}} iteration(s). Change in binary size: {{[0-9]+}} bytes.
11+
12+
.text
13+
.globl _Z3fooP1A
14+
.type _Z3fooP1A,@function
15+
_Z3fooP1A:
16+
ldr x8, [x0]
17+
ldr w0, [x8]
18+
br x30
19+
.size _Z3fooP1A, .-_Z3fooP1A
20+
21+
.globl _Z3barP1A
22+
.type _Z3barP1A,@function
23+
_Z3barP1A:
24+
stp x29, x30, [sp, #-16]!
25+
mov x29, sp
26+
bl _Z3fooP1A
27+
mul w0, w0, w0
28+
ldp x29, x30, [sp], #16
29+
ret
30+
.size _Z3barP1A, .-_Z3barP1A
31+
32+
.globl main
33+
.p2align 2
34+
.type main,@function
35+
main:
36+
mov w0, wzr
37+
ret
38+
.size main, .-main

0 commit comments

Comments
 (0)