diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index f7cf538bd0e86..50d652a8bc677 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1412,13 +1412,14 @@ class MCPlusBuilder { return false; } - /// Modify a direct call instruction \p Inst with an indirect call taking - /// a destination from a memory location pointed by \p TargetLocation symbol. - virtual bool convertCallToIndirectCall(MCInst &Inst, - const MCSymbol *TargetLocation, - MCContext *Ctx) { + /// Creates an indirect call to the function within the \p DirectCall PLT + /// stub. The function's memory location is pointed by the \p TargetLocation + /// symbol. + virtual InstructionListType + createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, MCContext *Ctx) { llvm_unreachable("not implemented"); - return false; + return {}; } /// Morph an indirect call into a load where \p Reg holds the call target. diff --git a/bolt/lib/Passes/PLTCall.cpp b/bolt/lib/Passes/PLTCall.cpp index d0276f22e14ef..2ed996fadbb99 100644 --- a/bolt/lib/Passes/PLTCall.cpp +++ b/bolt/lib/Passes/PLTCall.cpp @@ -48,8 +48,8 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) { return Error::success(); uint64_t NumCallsOptimized = 0; - for (auto &It : BC.getBinaryFunctions()) { - BinaryFunction &Function = It.second; + for (auto &BFI : BC.getBinaryFunctions()) { + BinaryFunction &Function = BFI.second; if (!shouldOptimize(Function)) continue; @@ -61,18 +61,21 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) { if (opts::PLT == OT_HOT && !BB.getKnownExecutionCount()) continue; - for (MCInst &Instr : BB) { - if (!BC.MIB->isCall(Instr)) + for (auto II = BB.begin(); II != BB.end(); II++) { + if (!BC.MIB->isCall(*II)) continue; - const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr); + const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(*II); if (!CallSymbol) continue; const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CallSymbol); if (!CalleeBF || !CalleeBF->isPLTFunction()) continue; - BC.MIB->convertCallToIndirectCall(Instr, CalleeBF->getPLTSymbol(), - BC.Ctx.get()); - BC.MIB->addAnnotation(Instr, "PLTCall", true); + const InstructionListType NewCode = BC.MIB->createIndirectPltCall( + *II, CalleeBF->getPLTSymbol(), BC.Ctx.get()); + II = BB.replaceInstruction(II, NewCode); + assert(!NewCode.empty() && "PLT Call replacement must be non-empty"); + std::advance(II, NewCode.size() - 1); + BC.MIB->addAnnotation(*II, "PLTCall", true); ++NumCallsOptimized; } } diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 0ae9d3668b93b..9590629f9b344 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1055,6 +1055,47 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { return true; } + InstructionListType createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, + MCContext *Ctx) override { + const bool IsTailCall = isTailCall(DirectCall); + assert((DirectCall.getOpcode() == AArch64::BL || + (DirectCall.getOpcode() == AArch64::B && IsTailCall)) && + "64-bit direct (tail) call instruction expected"); + + InstructionListType Code; + // Code sequence for indirect plt call: + // adrp x16 + // ldr x17, [x16, #] + // blr x17 ; or 'br' for tail calls + + MCInst InstAdrp; + InstAdrp.setOpcode(AArch64::ADRP); + InstAdrp.addOperand(MCOperand::createReg(AArch64::X16)); + InstAdrp.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, TargetLocation, + /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE); + Code.emplace_back(InstAdrp); + + MCInst InstLoad; + InstLoad.setOpcode(AArch64::LDRXui); + InstLoad.addOperand(MCOperand::createReg(AArch64::X17)); + InstLoad.addOperand(MCOperand::createReg(AArch64::X16)); + InstLoad.addOperand(MCOperand::createImm(0)); + setOperandToSymbolRef(InstLoad, /* OpNum */ 2, TargetLocation, + /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC); + Code.emplace_back(InstLoad); + + MCInst InstCall; + InstCall.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR); + InstCall.addOperand(MCOperand::createReg(AArch64::X17)); + if (IsTailCall) + setTailCall(InstCall); + Code.emplace_back(InstCall); + + return Code; + } + bool lowerTailCall(MCInst &Inst) override { removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); if (getConditionalTailCall(Inst)) diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index a33a9dc8c013c..792129544b7bb 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -1639,11 +1639,16 @@ class X86MCPlusBuilder : public MCPlusBuilder { return true; } - bool convertCallToIndirectCall(MCInst &Inst, const MCSymbol *TargetLocation, - MCContext *Ctx) override { - assert((Inst.getOpcode() == X86::CALL64pcrel32 || - (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst))) && + InstructionListType createIndirectPltCall(const MCInst &DirectCall, + const MCSymbol *TargetLocation, + MCContext *Ctx) override { + assert((DirectCall.getOpcode() == X86::CALL64pcrel32 || + (DirectCall.getOpcode() == X86::JMP_4 && isTailCall(DirectCall))) && "64-bit direct (tail) call instruction expected"); + + InstructionListType Code; + // Create a new indirect call by converting the previous direct call. + MCInst Inst = DirectCall; const auto NewOpcode = (Inst.getOpcode() == X86::CALL64pcrel32) ? X86::CALL64m : X86::JMP32m; Inst.setOpcode(NewOpcode); @@ -1664,7 +1669,8 @@ class X86MCPlusBuilder : public MCPlusBuilder { Inst.insert(Inst.begin(), MCOperand::createReg(X86::RIP)); // BaseReg - return true; + Code.emplace_back(Inst); + return Code; } void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override { diff --git a/bolt/test/AArch64/plt-call.test b/bolt/test/AArch64/plt-call.test new file mode 100644 index 0000000000000..da307d4a6c01e --- /dev/null +++ b/bolt/test/AArch64/plt-call.test @@ -0,0 +1,15 @@ +// Verify that PLTCall optimization works. + +RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \ +RUN: -o %t -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s + +// Call to printf +CHECK: adrp x16, printf@GOT +CHECK: ldr x17, [x16, :lo12:printf@GOT] +CHECK: blr x17 # PLTCall: 1 + +// Call to puts, that was tail-call optimized +CHECK: adrp x16, puts@GOT +CHECK: ldr x17, [x16, :lo12:puts@GOT] +CHECK: br x17 # TAILCALL # PLTCall: 1 diff --git a/bolt/test/Inputs/plt-tailcall.c b/bolt/test/Inputs/plt-tailcall.c new file mode 100644 index 0000000000000..13f6e29c60774 --- /dev/null +++ b/bolt/test/Inputs/plt-tailcall.c @@ -0,0 +1,8 @@ +#include "stub.h" + +int foo(char *c) { + printf(""); + __attribute__((musttail)) return puts(c); +} + +int main() { return foo("a"); } diff --git a/bolt/test/X86/plt-call.test b/bolt/test/X86/plt-call.test new file mode 100644 index 0000000000000..e6ae86c179d27 --- /dev/null +++ b/bolt/test/X86/plt-call.test @@ -0,0 +1,11 @@ +// Verify that PLTCall optimization works. + +RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \ +RUN: -o %t -Wl,-q +RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt --print-only=foo | FileCheck %s + +// Call to printf +CHECK: callq *printf@GOT(%rip) # PLTCall: 1 + +// Call to puts, that was tail-call optimized +CHECK: jmpl *puts@GOT(%rip) # TAILCALL # PLTCall: 1