Skip to content

Commit bab1c29

Browse files
authored
[BOLT] Extend Inliner to work on functions with Pointer Authentication (#162458)
The inliner uses DirectSP to check if a function has instructions that modify the SP. Exceptions are stack Push and Pop instructions. We can also allow pointer signing and authenticating instructions. The inliner removes the Return instructions from the inlined functions. If it is a fused pointer-authentication-and-return (e.g. RETAA), we have to generate a new authentication instruction.
1 parent 78d8298 commit bab1c29

File tree

6 files changed

+204
-0
lines changed

6 files changed

+204
-0
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,12 @@ class MCPlusBuilder {
632632
return false;
633633
}
634634

635+
/// Generate the matching pointer authentication instruction from a fused
636+
/// pauth-and-return instruction.
637+
virtual void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) {
638+
llvm_unreachable("not implemented");
639+
}
640+
635641
/// Returns the register used as a return address. Returns std::nullopt if
636642
/// not applicable, such as reading the return address from a system register
637643
/// or from the stack.

bolt/lib/Passes/Inliner.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ InliningInfo getInliningInfo(const BinaryFunction &BF) {
195195
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
196196
continue;
197197

198+
// Pointer signing and authenticatin instructions are used around
199+
// Push and Pop. These are also straightforward to handle.
200+
if (BC.isAArch64() &&
201+
(BC.MIB->isPSignOnLR(Inst) || BC.MIB->isPAuthOnLR(Inst) ||
202+
BC.MIB->isPAuthAndRet(Inst)))
203+
continue;
204+
198205
DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
199206
BC.MIB->hasUseOfPhysReg(Inst, SPReg);
200207
}
@@ -338,6 +345,18 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
338345
BC.Ctx.get());
339346
}
340347

348+
// Handling fused authentication and return instructions (Armv8.3-A):
349+
// if the Callee does not end in a tailcall, the return will be removed
350+
// from the inlined block. If that return is RETA(A|B), we have to keep
351+
// the authentication part.
352+
// RETAA -> AUTIASP
353+
// RETAB -> AUTIBSP
354+
if (!CSIsTailCall && BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) {
355+
MCInst Auth;
356+
BC.MIB->createMatchingAuth(Inst, Auth);
357+
InsertII =
358+
std::next(InlinedBB->insertInstruction(InsertII, std::move(Auth)));
359+
}
341360
if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
342361
InsertII =
343362
std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,33 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
313313
Inst.getOpcode() == AArch64::RETABSPPCr;
314314
}
315315

316+
void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override {
317+
Auth.clear();
318+
Auth.setOperands(AuthAndRet.getOperands());
319+
switch (AuthAndRet.getOpcode()) {
320+
case AArch64::RETAA:
321+
Auth.setOpcode(AArch64::AUTIASP);
322+
break;
323+
case AArch64::RETAB:
324+
Auth.setOpcode(AArch64::AUTIBSP);
325+
break;
326+
case AArch64::RETAASPPCi:
327+
Auth.setOpcode(AArch64::AUTIASPPCi);
328+
break;
329+
case AArch64::RETABSPPCi:
330+
Auth.setOpcode(AArch64::AUTIBSPPCi);
331+
break;
332+
case AArch64::RETAASPPCr:
333+
Auth.setOpcode(AArch64::AUTIASPPCr);
334+
break;
335+
case AArch64::RETABSPPCr:
336+
Auth.setOpcode(AArch64::AUTIBSPPCr);
337+
break;
338+
default:
339+
llvm_unreachable("Unhandled fused pauth-and-return instruction");
340+
}
341+
}
342+
316343
std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
317344
switch (Inst.getOpcode()) {
318345
case AArch64::PACIA:
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# This test checks that inlining functions with fused pointer-auth-and-return
2+
# instructions is properly handled by BOLT.
3+
4+
# REQUIRES: system-linux
5+
6+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
7+
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
8+
# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
9+
# RUN: %t.exe -o %t.bolt | FileCheck %s
10+
11+
# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 8 bytes.
12+
# CHECK: Binary Function "_Z3barP1A" after inlining {
13+
# CHECK-NOT: bl _Z3fooP1A
14+
# CHECK: ldr x8, [x0]
15+
# CHECK-NEXT: ldr w0, [x8]
16+
# CHECK-NEXT: autiasp
17+
18+
.text
19+
.globl _Z3fooP1A
20+
.type _Z3fooP1A,@function
21+
_Z3fooP1A:
22+
paciasp
23+
ldr x8, [x0]
24+
ldr w0, [x8]
25+
retaa
26+
.size _Z3fooP1A, .-_Z3fooP1A
27+
28+
.globl _Z3barP1A
29+
.type _Z3barP1A,@function
30+
_Z3barP1A:
31+
stp x29, x30, [sp, #-16]!
32+
mov x29, sp
33+
bl _Z3fooP1A
34+
mul w0, w0, w0
35+
ldp x29, x30, [sp], #16
36+
ret
37+
.size _Z3barP1A, .-_Z3barP1A
38+
39+
.globl main
40+
.p2align 2
41+
.type main,@function
42+
main:
43+
mov w0, wzr
44+
ret
45+
.size main, .-main
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# This test checks that inlining functions with fused pointer-auth-and-return
2+
# instructions into a location with a tailcall is properly handled by BOLT.
3+
# Because _Z3barP1A ends in a tailcall, we don't remove the return instruction
4+
# from the inlined block. Therefore, we should see a retaa, and not an autiasp.
5+
6+
# REQUIRES: system-linux
7+
8+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
9+
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
10+
# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
11+
# RUN: %t.exe -o %t.bolt | FileCheck %s
12+
13+
# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 12 bytes.
14+
# CHECK: Binary Function "_Z3barP1A" after inlining {
15+
# CHECK-NOT: bl _Z3fooP1A
16+
# CHECK: mov x29, sp
17+
# CHECK-NEXT: paciasp
18+
# CHECK-NEXT: ldr x8, [x0]
19+
# CHECK-NEXT: ldr w0, [x8]
20+
# CHECK-NEXT: retaa
21+
22+
.text
23+
.globl _Z3fooP1A
24+
.type _Z3fooP1A,@function
25+
_Z3fooP1A:
26+
paciasp
27+
ldr x8, [x0]
28+
ldr w0, [x8]
29+
retaa
30+
.size _Z3fooP1A, .-_Z3fooP1A
31+
32+
.globl _Z3barP1A
33+
.type _Z3barP1A,@function
34+
_Z3barP1A:
35+
stp x29, x30, [sp, #-16]!
36+
mov x29, sp
37+
b _Z3fooP1A // tailcall
38+
.size _Z3barP1A, .-_Z3barP1A
39+
40+
.globl main
41+
.p2align 2
42+
.type main,@function
43+
main:
44+
mov w0, wzr
45+
ret
46+
.size main, .-main
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# This test checks that inlining functions with the pauth-lr variants of
2+
# fused pointer-auth-and-return instructions is properly handled by BOLT.
3+
4+
# REQUIRES: system-linux
5+
6+
# RUN: %clang %cflags -march=armv9.5-a+pauth-lr -O0 %s -o %t.exe -Wl,-q
7+
# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
8+
# RUN: %t.exe -o %t.bolt | FileCheck %s
9+
10+
# CHECK: BOLT-INFO: inlined 0 calls at 2 call sites in 2 iteration(s). Change in binary size: 16 bytes.
11+
# CHECK: Binary Function "_Z3barP1A" after inlining {
12+
# CHECK-NOT: bl _Z3fooP1A
13+
# CHECK: paciasppc
14+
# CHECK-NEXT: ldr x8, [x0]
15+
# CHECK-NEXT: ldr w0, [x8]
16+
# CHECK-NEXT: autiasppcr x28
17+
# CHECK-NEXT: paciasppc
18+
# CHECK-NEXT: ldr x7, [x0]
19+
# CHECK-NEXT: ldr w0, [x7]
20+
# CHECK-NEXT: autiasppc _Z3bazP1A
21+
22+
.text
23+
.globl _Z3fooP1A
24+
.type _Z3fooP1A,@function
25+
_Z3fooP1A:
26+
paciasppc
27+
ldr x8, [x0]
28+
ldr w0, [x8]
29+
retaasppcr x28
30+
.size _Z3fooP1A, .-_Z3fooP1A
31+
32+
.text
33+
.globl _Z3bazP1A
34+
.type _Z3bazP1A,@function
35+
_Z3bazP1A:
36+
0:
37+
paciasppc
38+
ldr x7, [x0]
39+
ldr w0, [x7]
40+
retaasppc 0b
41+
.size _Z3bazP1A, .-_Z3bazP1A
42+
43+
.globl _Z3barP1A
44+
.type _Z3barP1A,@function
45+
_Z3barP1A:
46+
stp x29, x30, [sp, #-16]!
47+
mov x29, sp
48+
bl _Z3fooP1A
49+
bl _Z3bazP1A
50+
mul w0, w0, w0
51+
ldp x29, x30, [sp], #16
52+
ret
53+
.size _Z3barP1A, .-_Z3barP1A
54+
55+
.globl main
56+
.p2align 2
57+
.type main,@function
58+
main:
59+
mov w0, wzr
60+
ret
61+
.size main, .-main

0 commit comments

Comments
 (0)