Skip to content

Commit 9aa1adb

Browse files
committed
[PPC][BOLT]
In this initial contribution to BOLT PPC I am optimising a simple “puts” Hello World example. In the latest steps of BOLT rewriting, after handling the puts method, BOLT branched into a STUB helper method. The STUB was trying to invoke the GOT (Global Offset Table) but unfortunately assumed the GOT was part of the same TOC window as the caller (r2 - 0x8000), which is not true. BOLT's $__GOT is located far outside that window, so the `ld` (load) instruction retrieved the wrong word (e.g. the TOC base itself), and the following `bctr` jumped into data, causing a segfault. To correctly load the real function address of `puts` (callee’s function address) from $__GOT, I changed the STUB helper method to use absolute relocation instead.
1 parent 143d450 commit 9aa1adb

File tree

3 files changed

+183
-3
lines changed

3 files changed

+183
-3
lines changed

bolt/include/bolt/Target/PowerPC/PPCMCPlusBuilder.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#pragma once
2-
32
#include "bolt/Core/MCPlusBuilder.h"
3+
#include <vector>
44

55
namespace llvm {
6+
class MCInst;
7+
class MCSymbol;
68
namespace bolt {
79

810
class PPCMCPlusBuilder : public MCPlusBuilder {
@@ -75,6 +77,14 @@ class PPCMCPlusBuilder : public MCPlusBuilder {
7577
const MCAsmBackend &MAB) const override;
7678

7779
bool isTOCRestoreAfterCall(const MCInst &I) const override;
80+
81+
// Build a PPC64 call-stub as MCInsts; the stub tail-calls Target via CTR.
82+
// Out will receive: [std r2,24(r1)] (optional), address materialization into
83+
// r12, mtctr r12, bctr. No @toc* fixups are used.
84+
void buildCallStubAbsolute(std::vector<llvm::MCInst> &Out,
85+
const llvm::MCSymbol *TargetSym, uint16_t highest,
86+
uint16_t higher, uint16_t half,
87+
uint16_t lower) const;
7888
};
7989

8090
} // namespace bolt

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "bolt/Rewrite/MetadataRewriters.h"
3535
#include "bolt/RuntimeLibs/HugifyRuntimeLibrary.h"
3636
#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
37+
#include "bolt/Target/PowerPC/PPCMCPlusBuilder.h"
3738
#include "bolt/Utils/CommandLineOpts.h"
3839
#include "bolt/Utils/Utils.h"
3940
#include "llvm/ADT/AddressRanges.h"
@@ -60,10 +61,12 @@
6061
#include "llvm/Support/ToolOutputFile.h"
6162
#include "llvm/Support/raw_ostream.h"
6263
#include <algorithm>
64+
#include <cstdint>
6365
#include <fstream>
6466
#include <memory>
6567
#include <optional>
6668
#include <system_error>
69+
#include <unordered_map>
6770

6871
#undef DEBUG_TYPE
6972
#define DEBUG_TYPE "bolt"
@@ -2863,6 +2866,58 @@ void RewriteInstance::readRelocations(const SectionRef &Section) {
28632866
handleRelocation(RelocatedSection, Rel);
28642867
}
28652868

2869+
static bool shouldUsePPCAbsoluteCallStub(const RelocationRef &Rel,
2870+
MCSymbol *TargetSym) {
2871+
(void)Rel;
2872+
(void)TargetSym;
2873+
return true;
2874+
}
2875+
2876+
static BinaryFunction *getOrCreatePPCAbsoluteCallStub(BinaryContext &BC,
2877+
MCSymbol &TargetSym,
2878+
MCPlusBuilder &MIB) {
2879+
std::string StubName =
2880+
("__bolt_ppc_abs_call_stub." + TargetSym.getName()).str();
2881+
2882+
static std::unordered_map<std::string, BinaryFunction *> PPCStubCache;
2883+
auto It = PPCStubCache.find(StubName);
2884+
if (It != PPCStubCache.end())
2885+
return It->second;
2886+
2887+
// Create an injected fuction for the stub.
2888+
auto *StubBF = BC.createInjectedBinaryFunction(StubName);
2889+
StubBF->setSimple(true);
2890+
StubBF->setCodeSectionName(".text"); // or a dedicated stubs section
2891+
2892+
// Build one basic block
2893+
BinaryBasicBlock *BB = StubBF->addBasicBlock(/*Label=*/nullptr);
2894+
2895+
uint64_t TargetAddr = 0;
2896+
if (auto *BSym = BC.getBinaryDataByName(TargetSym.getName())) {
2897+
TargetAddr = BSym->getAddress();
2898+
}
2899+
2900+
assert(TargetAddr && "target symbol address expected");
2901+
2902+
auto highest = static_cast<uint16_t>((TargetAddr >> 48) & 0xFFFF);
2903+
auto higher = static_cast<uint16_t>((TargetAddr >> 32) & 0xFFFF);
2904+
auto half = static_cast<uint16_t>(((TargetAddr + 0x8000) >> 16) & 0xFFFF);
2905+
auto lower = static_cast<uint16_t>(TargetAddr & 0xFFFF);
2906+
2907+
// Build the stub MCInsts
2908+
std::vector<MCInst> Seq;
2909+
auto &PPCBuilder = static_cast<PPCMCPlusBuilder &>(*BC.MIB);
2910+
PPCBuilder.buildCallStubAbsolute(Seq, &TargetSym, highest, higher, half,
2911+
lower);
2912+
2913+
// Append instructions to the basic block
2914+
for (auto &I : Seq)
2915+
BB->addInstruction(I);
2916+
2917+
PPCStubCache.emplace(StubName, StubBF);
2918+
return StubBF;
2919+
}
2920+
28662921
void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
28672922
const RelocationRef &Rel) {
28682923
const bool IsAArch64 = BC->isAArch64();
@@ -2986,6 +3041,24 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
29863041
}
29873042
}
29883043

3044+
if (IsPPC64 && IsFromCode &&
3045+
(RType == ELF::R_PPC64_REL24 || RType == ELF::R_PPC64_REL24_NOTOC) &&
3046+
ReferencedSymbol) {
3047+
3048+
const StringRef SymName = ReferencedSymbol->getName();
3049+
const bool AlreadyStub = SymName.starts_with("__bolt_ppc_abs_call_stub.");
3050+
3051+
if (!AlreadyStub && shouldUsePPCAbsoluteCallStub(Rel, ReferencedSymbol)) {
3052+
auto *StubBF =
3053+
getOrCreatePPCAbsoluteCallStub(*BC, *ReferencedSymbol, *BC->MIB);
3054+
ReferencedSymbol = StubBF->getSymbol(); // redirect to stub
3055+
Addend = 0;
3056+
ExtractedValue = 0;
3057+
}
3058+
}
3059+
BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend,
3060+
ExtractedValue);
3061+
29893062
ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
29903063

29913064
// --- SAFE symbol->section lookup (PPC64 only) ---

bolt/lib/Target/PowerPC/PPCMCPlusBuilder.cpp

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,18 @@
1616
#include "llvm/BinaryFormat/ELF.h"
1717
#include "llvm/MC/MCInst.h"
1818
#include "llvm/MC/MCRegisterInfo.h"
19-
#include <optional>
20-
#include <string>
19+
#include <cstdint>
2120
#define DEBUG_TYPE "bolt-ppc"
21+
#include "bolt/Core/BinaryFunction.h"
22+
#include "llvm/MC/MCAsmBackend.h"
23+
#include "llvm/MC/MCContext.h"
24+
#include "llvm/MC/MCExpr.h"
25+
#include "llvm/MC/MCFixup.h"
26+
#include "llvm/MC/MCSymbol.h"
2227
#include "llvm/Support/Debug.h"
2328
#include "llvm/Support/raw_ostream.h"
29+
#include <optional>
30+
#include <string>
2431

2532
using namespace llvm;
2633
using namespace bolt;
@@ -492,6 +499,96 @@ bool PPCMCPlusBuilder::isTOCRestoreAfterCall(const MCInst &I) const {
492499
return true;
493500
}
494501

502+
static inline MCOperand R(unsigned Reg) { return MCOperand::createReg(Reg); }
503+
static inline MCOperand Imm(int64_t I) { return MCOperand::createImm(I); }
504+
// Build a 64-bit absolute address of the callee's function address (e.g.
505+
// "puts") into r12, then tail-call it via BCTR.
506+
void PPCMCPlusBuilder::buildCallStubAbsolute(std::vector<MCInst> &Out,
507+
const MCSymbol *TargetSym,
508+
uint16_t highest, uint16_t higher,
509+
uint16_t ha, uint16_t l) const {
510+
// Registers
511+
const unsigned R1 = PPC::X1; // sp
512+
const unsigned R2 = PPC::X2; // caller TOC
513+
const unsigned R12 = PPC::X12; // scratch / entry per ELFv2
514+
515+
// 1) Optional: save caller TOC in the standard post-call slot 24(r1)
516+
// ABI compliant PPC64 code should do this before calls.
517+
{
518+
MCInst I;
519+
I.setOpcode(PPC::STD); // STD rS, D(rA)
520+
I.addOperand(R(R2));
521+
I.addOperand(R(R1));
522+
I.addOperand(Imm(24));
523+
Out.push_back(std::move(I));
524+
}
525+
// Top 32 bits
526+
// 2) Materialize absolute 64-bit address into r12 (no @toc*)
527+
// addis r12, r0, target@highest. bits (63-48)
528+
{
529+
MCInst I;
530+
I.setOpcode(PPC::ADDIS);
531+
I.addOperand(R(R12));
532+
I.addOperand(R(PPC::X0));
533+
I.addOperand(Imm(highest));
534+
Out.push_back(std::move(I));
535+
}
536+
// ori r12, r12, target@higher. bits (47-32)
537+
{
538+
MCInst I;
539+
I.setOpcode(PPC::ORI);
540+
I.addOperand(R(R12));
541+
I.addOperand(R(R12));
542+
I.addOperand(Imm(higher));
543+
Out.push_back(std::move(I));
544+
}
545+
// rldicr r12, r12, 32, 31 ; aka sldi r12, r12, 32
546+
// shift left to make room for lower 32 bits
547+
{
548+
MCInst I;
549+
I.setOpcode(PPC::RLDICR);
550+
I.addOperand(R(R12));
551+
I.addOperand(R(R12));
552+
I.addOperand(Imm(32));
553+
I.addOperand(Imm(31));
554+
Out.push_back(std::move(I));
555+
}
556+
// Low 32 bits
557+
// addis r12, r12, target@ha
558+
{
559+
MCInst I;
560+
I.setOpcode(PPC::ADDIS);
561+
I.addOperand(R(R12));
562+
I.addOperand(R(R12));
563+
I.addOperand(Imm(ha));
564+
Out.push_back(std::move(I));
565+
}
566+
// ori r12, r12, target@l
567+
{
568+
MCInst I;
569+
I.setOpcode(PPC::ORI);
570+
I.addOperand(R(R12));
571+
I.addOperand(R(R12));
572+
I.addOperand(Imm(l));
573+
Out.push_back(std::move(I));
574+
}
575+
// Now r12 has the full 64-bit address of TargetSym.
576+
// 3) mtctr r12 ; bctr
577+
// Move address to Counter Register
578+
{
579+
MCInst I;
580+
I.setOpcode(PPC::MTCTR);
581+
I.addOperand(R(R12));
582+
Out.push_back(std::move(I));
583+
}
584+
// Branch to the address in CTR (tail call to TargetSym)
585+
{
586+
MCInst I;
587+
I.setOpcode(PPC::BCTR);
588+
Out.push_back(std::move(I));
589+
}
590+
}
591+
495592
namespace llvm {
496593
namespace bolt {
497594

0 commit comments

Comments
 (0)