Skip to content

Commit ad605bd

Browse files
authored
[bolt][aarch64] Change indirect call instrumentation snippet
Indirect call instrumentation snippet uses x16 register in exit handler to go to destination target __bolt_instr_ind_call_handler_func: msr nzcv, x1 ldp x0, x1, [sp], #16 ldr x16, [sp], #16 ldp x0, x1, [sp], #16 br x16 <----- This patch adds the instrumentation snippet by calling instrumentation runtime library through indirect call instruction and adding the wrapper to store/load target value and the register for original indirect instruction. Example: mov x16, foo infirectCall: adrp x8, Label add x8, x8, #:lo12:Label blr x8 Before: Instrumented indirect call: stp x0, x1, [sp, #-16]! mov x0, x8 movk x1, #0x0, lsl #48 movk x1, #0x0, lsl #32 movk x1, #0x0, lsl #16 movk x1, #0x0 stp x0, x1, [sp, #-16]! adrp x0, __bolt_instr_ind_call_handler_func add x0, x0, #:lo12:__bolt_instr_ind_call_handler_func blr x0 __bolt_instr_ind_call_handler: (exit snippet) msr nzcv, x1 ldp x0, x1, [sp], #16 ldr x16, [sp], #16 ldp x0, x1, [sp], #16 br x16 <- overwrites the original value in X16 __bolt_instr_ind_call_handler_func: (entry snippet) stp x0, x1, [sp, #-16]! mrs x1, nzcv adrp x0, __bolt_instr_ind_call_handler add x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler ldr x0, [x0] cmp x0, #0x0 b.eq __bolt_instr_ind_call_handler str x30, [sp, #-16]! blr x0 <--- runtime lib store/load all regs ldr x30, [sp], #16 b __bolt_instr_ind_call_handler _________________________________________________________________________ After: mov x16, foo infirectCall: adrp x8, Label add x8, x8, #:lo12:Label blr x8 Instrumented indirect call: stp x0, x1, [sp, #-16]! mov x0, x8 movk x1, #0x0, lsl #48 movk x1, #0x0, lsl #32 movk x1, #0x0, lsl #16 movk x1, #0x0 stp x0, x30, [sp, #-16]! adrp x8, __bolt_instr_ind_call_handler_func add x8, x8, #:lo12:__bolt_instr_ind_call_handler_func blr x8 <--- call trampoline instr lib ldp x0, x30, [sp], #16 mov x8, x0 <---- restore original target ldp x0, x1, [sp], #16 blr x8 <--- original indirect call instruction // don't touch regs besides x0, x1 __bolt_instr_ind_call_handler: (exit snippet) ret <---- return to original function with indirect call __bolt_instr_ind_call_handler_func: (entry snippet) adrp x0, __bolt_instr_ind_call_handler add x0, x0, #:lo12:__bolt_instr_ind_call_handler ldr x0, [x0] cmp x0, #0x0 b.eq __bolt_instr_ind_call_handler str x30, [sp, #-16]! blr x0 <--- runtime lib store/load all regs ldr x30, [sp], #16 b __bolt_instr_ind_call_handler
1 parent d39f524 commit ad605bd

File tree

6 files changed

+164
-63
lines changed

6 files changed

+164
-63
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,11 @@ class MCPlusBuilder {
538538
llvm_unreachable("not implemented");
539539
}
540540

541+
virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
542+
MCContext *Ctx) {
543+
llvm_unreachable("not implemented");
544+
}
545+
541546
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
542547

543548
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {

bolt/lib/Passes/Instrumentation.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,12 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
305305
: IndCallHandlerExitBBFunction->getSymbol(),
306306
IndCallSiteID, &*BC.Ctx);
307307

308-
Iter = BB.eraseInstruction(Iter);
309-
Iter = insertInstructions(CounterInstrs, BB, Iter);
310-
--Iter;
308+
if (!BC.isAArch64()) {
309+
Iter = BB.eraseInstruction(Iter);
310+
Iter = insertInstructions(CounterInstrs, BB, Iter);
311+
--Iter;
312+
} else
313+
Iter = insertInstructions(CounterInstrs, BB, Iter);
311314
}
312315

313316
bool Instrumentation::instrumentOneTarget(

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 86 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
4848

4949
namespace {
5050

51-
static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
51+
[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5252
Inst.setOpcode(AArch64::MRS);
5353
Inst.clear();
5454
Inst.addOperand(MCOperand::createReg(RegName));
5555
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
5656
}
5757

58-
static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
58+
[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5959
Inst.setOpcode(AArch64::MSR);
6060
Inst.clear();
6161
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2114,6 +2114,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
21142114
convertJmpToTailCall(Inst);
21152115
}
21162116

2117+
void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
2118+
MCContext *Ctx) override {
2119+
Inst.setOpcode(AArch64::B);
2120+
Inst.clear();
2121+
Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
2122+
Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
2123+
}
2124+
21172125
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
21182126
const MCSymbol *&TBB, const MCSymbol *&FBB,
21192127
MCInst *&CondBranch,
@@ -2471,21 +2479,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
24712479
}
24722480

24732481
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
2474-
InstructionListType Insts(5);
24752482
// Code sequence for instrumented indirect call handler:
2476-
// msr nzcv, x1
2477-
// ldp x0, x1, [sp], #16
2478-
// ldr x16, [sp], #16
2479-
// ldp x0, x1, [sp], #16
2480-
// br x16
2481-
setSystemFlag(Insts[0], AArch64::X1);
2482-
createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
2483-
// Here we load address of the next function which should be called in the
2484-
// original binary to X16 register. Writing to X16 is permitted without
2485-
// needing to restore.
2486-
loadReg(Insts[2], AArch64::X16, AArch64::SP);
2487-
createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
2488-
createIndirectBranch(Insts[4], AArch64::X16, 0);
2483+
// ret
2484+
2485+
InstructionListType Insts;
2486+
2487+
Insts.emplace_back();
2488+
createReturn(Insts.back());
2489+
24892490
return Insts;
24902491
}
24912492

@@ -2561,39 +2562,59 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
25612562
MCSymbol *HandlerFuncAddr,
25622563
int CallSiteID,
25632564
MCContext *Ctx) override {
2564-
InstructionListType Insts;
25652565
// Code sequence used to enter indirect call instrumentation helper:
2566-
// stp x0, x1, [sp, #-16]! createPushRegisters
2567-
// mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
2566+
// stp x0, x1, [sp, #-16]! createPushRegisters (1)
2567+
// mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr
25682568
// mov x1 CallSiteID createLoadImmediate ->
25692569
// movk x1, #0x0, lsl #48
25702570
// movk x1, #0x0, lsl #32
25712571
// movk x1, #0x0, lsl #16
25722572
// movk x1, #0x0
2573-
// stp x0, x1, [sp, #-16]!
2574-
// bl *HandlerFuncAddr createIndirectCall ->
2573+
// stp x0, x30, [sp, #-16]! (2)
25752574
// adr x0 *HandlerFuncAddr -> adrp + add
2576-
// blr x0
2575+
// blr x0 (__bolt_instr_ind_call_handler_func)
2576+
// ldp x0, x30, [sp], #16 (2)
2577+
// mov x0, target ; move target address to used register
2578+
// ldp x0, x1, [sp], #16 (1)
2579+
2580+
InstructionListType Insts;
25772581
Insts.emplace_back();
2578-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2582+
createPushRegisters(Insts.back(), getIntArgRegister(0),
2583+
getIntArgRegister(1));
25792584
Insts.emplace_back(CallInst);
2580-
convertIndirectCallToLoad(Insts.back(), AArch64::X0);
2585+
convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
25812586
InstructionListType LoadImm =
25822587
createLoadImmediate(getIntArgRegister(1), CallSiteID);
25832588
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
25842589
Insts.emplace_back();
2585-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2590+
createPushRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
25862591
Insts.resize(Insts.size() + 2);
2587-
InstructionListType Addr =
2588-
materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
2592+
InstructionListType Addr = materializeAddress(
2593+
HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
25892594
assert(Addr.size() == 2 && "Invalid Addr size");
25902595
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2596+
25912597
Insts.emplace_back();
2592-
createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
2598+
createIndirectCallInst(Insts.back(), false,
2599+
CallInst.getOperand(0).getReg());
25932600

2594-
// Carry over metadata including tail call marker if present.
2595-
stripAnnotations(Insts.back());
2596-
moveAnnotations(std::move(CallInst), Insts.back());
2601+
Insts.emplace_back();
2602+
createPopRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
2603+
2604+
// move x0 to indirect call register
2605+
Insts.emplace_back();
2606+
Insts.back().setOpcode(AArch64::ORRXrs);
2607+
Insts.back().insert(Insts.back().begin(),
2608+
MCOperand::createReg(CallInst.getOperand(0).getReg()));
2609+
Insts.back().insert(Insts.back().begin() + 1,
2610+
MCOperand::createReg(AArch64::XZR));
2611+
Insts.back().insert(Insts.back().begin() + 2,
2612+
MCOperand::createReg(getIntArgRegister(0)));
2613+
Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0));
2614+
2615+
Insts.emplace_back();
2616+
createPopRegisters(Insts.back(), getIntArgRegister(0),
2617+
getIntArgRegister(1));
25972618

25982619
return Insts;
25992620
}
@@ -2602,43 +2623,53 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
26022623
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
26032624
const MCSymbol *IndCallHandler,
26042625
MCContext *Ctx) override {
2605-
// Code sequence used to check whether InstrTampoline was initialized
2626+
// Code sequence used to check whether InstrTrampoline was initialized
26062627
// and call it if so, returns via IndCallHandler
2607-
// stp x0, x1, [sp, #-16]!
2608-
// mrs x1, nzcv
2609-
// adr x0, InstrTrampoline -> adrp + add
2610-
// ldr x0, [x0]
2628+
// adrp x0, InstrTrampoline
2629+
// ldr x0, [x0, #lo12:InstrTrampoline]
26112630
// subs x0, x0, #0x0
26122631
// b.eq IndCallHandler
26132632
// str x30, [sp, #-16]!
26142633
// blr x0
26152634
// ldr x30, [sp], #16
26162635
// b IndCallHandler
26172636
InstructionListType Insts;
2637+
2638+
// load handler address
2639+
MCInst InstAdrp;
2640+
InstAdrp.setOpcode(AArch64::ADRP);
2641+
InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2642+
InstAdrp.addOperand(MCOperand::createImm(0));
2643+
setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
2644+
/* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
2645+
Insts.emplace_back(InstAdrp);
2646+
2647+
MCInst InstLoad;
2648+
InstLoad.setOpcode(AArch64::LDRXui);
2649+
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2650+
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2651+
InstLoad.addOperand(MCOperand::createImm(0));
2652+
setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
2653+
/* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
2654+
Insts.emplace_back(InstLoad);
2655+
2656+
InstructionListType CmpJmp =
2657+
createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
2658+
Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
2659+
26182660
Insts.emplace_back();
2619-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2620-
Insts.emplace_back();
2621-
getSystemFlag(Insts.back(), getIntArgRegister(1));
2622-
Insts.emplace_back();
2623-
Insts.emplace_back();
2624-
InstructionListType Addr =
2625-
materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
2626-
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2627-
assert(Addr.size() == 2 && "Invalid Addr size");
2628-
Insts.emplace_back();
2629-
loadReg(Insts.back(), AArch64::X0, AArch64::X0);
2630-
InstructionListType cmpJmp =
2631-
createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
2632-
Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
2633-
Insts.emplace_back();
2634-
storeReg(Insts.back(), AArch64::LR, AArch64::SP);
2661+
storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2662+
26352663
Insts.emplace_back();
26362664
Insts.back().setOpcode(AArch64::BLR);
2637-
Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
2665+
Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
2666+
26382667
Insts.emplace_back();
2639-
loadReg(Insts.back(), AArch64::LR, AArch64::SP);
2668+
loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2669+
26402670
Insts.emplace_back();
2641-
createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
2671+
createDirectBranch(Insts.back(), IndCallHandler, Ctx);
2672+
26422673
return Insts;
26432674
}
26442675

bolt/runtime/instr.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,9 +1691,12 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
16911691
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
16921692
{
16931693
#if defined(__aarch64__)
1694+
// the target address is placed on stack
1695+
// the identifier of the indirect call site is placed in X1 register
1696+
16941697
// clang-format off
16951698
__asm__ __volatile__(SAVE_ALL
1696-
"ldp x0, x1, [sp, #288]\n"
1699+
"ldr x0, [sp, #272]\n"
16971700
"bl instrumentIndirectCall\n"
16981701
RESTORE_ALL
16991702
"ret\n"
@@ -1728,9 +1731,12 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
17281731
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
17291732
{
17301733
#if defined(__aarch64__)
1734+
// the target address is placed on stack
1735+
// the identifier of the indirect call site is placed in X1 register
1736+
17311737
// clang-format off
17321738
__asm__ __volatile__(SAVE_ALL
1733-
"ldp x0, x1, [sp, #288]\n"
1739+
"ldr x0, [sp, #272]\n"
17341740
"bl instrumentIndirectCall\n"
17351741
RESTORE_ALL
17361742
"ret\n"

bolt/runtime/sys_aarch64.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
"stp x24, x25, [sp, #-16]!\n" \
1919
"stp x26, x27, [sp, #-16]!\n" \
2020
"stp x28, x29, [sp, #-16]!\n" \
21-
"str x30, [sp,#-16]!\n"
21+
"mrs x29, nzcv\n" \
22+
"stp x29, x30, [sp, #-16]!\n"
2223
// Mirrors SAVE_ALL
2324
#define RESTORE_ALL \
24-
"ldr x30, [sp], #16\n" \
25+
"ldp x29, x30, [sp], #16\n" \
26+
"msr nzcv, x29\n" \
2527
"ldp x28, x29, [sp], #16\n" \
2628
"ldp x26, x27, [sp], #16\n" \
2729
"ldp x24, x25, [sp], #16\n" \

bolt/test/runtime/AArch64/instrumentation-ind-call.c

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,63 @@ int main() {
1515
REQUIRES: system-linux,bolt-runtime
1616
1717
RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fpie
18+
RUN: llvm-objdump --disassemble-symbols=main %t.exe \
19+
RUN: | FileCheck %s --check-prefix=CHECKINDIRECTREG
20+
21+
CHECKINDIRECTREG: mov w0, #0xa
22+
CHECKINDIRECTREG-NEXT: mov w1, #0x14
23+
CHECKINDIRECTREG-NEXT: blr x8
1824
1925
RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
20-
RUN: -o %t.instrumented
26+
RUN: -o %t.instrumented \
27+
RUN: | FileCheck %s --check-prefix=CHECK-INSTR-LOG
28+
29+
CHECK-INSTR-LOG: BOLT-INSTRUMENTER: Number of indirect call site descriptors: 1
30+
31+
RUN: llvm-objdump --disassemble-symbols=main %t.instrumented \
32+
RUN: | FileCheck %s --check-prefix=CHECK-INSTR-INDIRECTREG
33+
34+
RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler \
35+
RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL
36+
RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler_func \
37+
RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC
38+
39+
CHECK-INSTR-INDIRECTREG: mov w0, #0xa
40+
CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14
41+
// store current values
42+
CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
43+
// store the indirect target address in x0
44+
CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8
45+
// load callsite id into x1
46+
CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
47+
CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
48+
CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
49+
CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
50+
CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x30, {{.*}}
51+
CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}}
52+
CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}}
53+
// call instrumentation library handler function
54+
CHECK-INSTR-INDIRECTREG-NEXT: blr x8
55+
// restore registers saved before
56+
CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x30, {{.*}}
57+
CHECK-INSTR-INDIRECTREG-NEXT: mov x8, x0
58+
CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
59+
// original indirect call instruction
60+
CHECK-INSTR-INDIRECTREG-NEXT: blr x8
61+
62+
63+
CHECK-INSTR-INDIR-CALL: __bolt_instr_ind_call_handler>:
64+
CHECK-INSTR-INDIR-CALL-NEXT: ret
65+
66+
CHECK-INSTR-INDIR-CALL-FUNC: __bolt_instr_ind_call_handler_func>:
67+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: adrp x0
68+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x0
69+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: cmp x0, #0x0
70+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b.eq{{.*}}__bolt_instr_ind_call_handler
71+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: str x30
72+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: blr x0
73+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x30
74+
CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b{{.*}}__bolt_instr_ind_call_handler
2175
2276
# Instrumented program needs to finish returning zero
2377
RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT

0 commit comments

Comments
 (0)