Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,16 @@ class MCPlusBuilder {
return false;
}

virtual bool isAddXri(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}

virtual bool isMOVW(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}

virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; }

virtual bool mayLoad(const MCInst &Inst) const {
Expand Down
5 changes: 0 additions & 5 deletions bolt/include/bolt/Core/Relocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,6 @@ struct Relocation {
/// Skip relocations that we don't want to handle in BOLT
static bool skipRelocationType(uint32_t Type);

/// Handle special cases when relocation should not be processed by BOLT or
/// change relocation \p Type to proper one before continuing if \p Contents
/// and \P Type mismatch occurred.
static bool skipRelocationProcess(uint32_t &Type, uint64_t Contents);

/// Adjust value depending on relocation type (make it PC relative or not).
static uint64_t encodeValue(uint32_t Type, uint64_t Value, uint64_t PC);

Expand Down
13 changes: 11 additions & 2 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() {
}
}

uint64_t Addend = Relocation.Addend;

// For GOT relocations, create a reference against GOT entry ignoring
// the relocation symbol.
if (Relocation::isGOT(Relocation.Type)) {
assert(Relocation::isPCRelative(Relocation.Type) &&
"GOT relocation must be PC-relative on RISC-V");
Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
Addend = Relocation.Value + Relocation.Offset + getAddress();
}
int64_t Value = Relocation.Value;
const bool Result = BC.MIB->replaceImmWithSymbolRef(
Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
Relocation.Type);
Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
}
Expand Down
85 changes: 0 additions & 85 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,78 +257,6 @@ static bool skipRelocationTypeRISCV(uint32_t Type) {
}
}

static bool skipRelocationProcessX86(uint32_t &Type, uint64_t Contents) {
return false;
}

static bool skipRelocationProcessAArch64(uint32_t &Type, uint64_t Contents) {
auto IsMov = [](uint64_t Contents) -> bool {
// The bits 28-23 are 0b100101
return (Contents & 0x1f800000) == 0x12800000;
};

auto IsB = [](uint64_t Contents) -> bool {
// The bits 31-26 are 0b000101
return (Contents & 0xfc000000) == 0x14000000;
};

auto IsAddImm = [](uint64_t Contents) -> bool {
// The bits 30-23 are 0b00100010
return (Contents & 0x7F800000) == 0x11000000;
};

// The linker might relax ADRP+LDR instruction sequence for loading symbol
// address from GOT table to ADRP+ADD sequence that would point to the
// binary-local symbol. Change relocation type in order to process it right.
if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
return false;
}

// The linker might perform TLS relocations relaxations, such as
// changed TLS access model (e.g. changed global dynamic model
// to initial exec), thus changing the instructions. The static
// relocations might be invalid at this point and we might no
// need to process these relocations anymore.
// More information could be found by searching
// elfNN_aarch64_tls_relax in bfd
switch (Type) {
default:
break;
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
if (IsMov(Contents))
return true;
}
}

// The linker might replace load/store instruction with jump and
// veneer due to errata 843419
// https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
// Thus load/store relocations for these instructions must be ignored
// NOTE: We only process GOT and TLS relocations this way since the
// addend used in load/store instructions won't change after bolt
// (it is important since the instruction in veneer won't have relocation)
switch (Type) {
default:
break;
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see how the code before and after the change handles:

  • the LD64_GOT_LO12_NC+ADD case by changing the type to ADD_ABS_LO12_NC (for ADRP+LDR -> ADRP+ADD)
  • the NOP+ADR optimization case by ignoring it

However, I’m not sure I fully understand how R_AARCH64_LD64_GOT_LO12_NC for errata 843419 is handled.

Does it fall into the else case at line 125 in AArch64MCSymbolizer.cpp?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We currently don't symbolize branch instructions, so I was under impression that this code was effectively a no-op . cc: @yota9 . Also, I realized that we lack a test case for the errata.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I say "we don't symbolize branches", I mean we ignore them in AArch64MCSymbolizer. We still symbolize them in BinaryFunction::disassemble(), but we do this without looking at relocations.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we handle this errata when assembling the program back currently, only on disassemble stage to correctly handle "wrong" relocations

case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
case ELF::R_AARCH64_TLSDESC_LD64_LO12: {
if (IsB(Contents))
return true;
}
}

return false;
}

static bool skipRelocationProcessRISCV(uint32_t &Type, uint64_t Contents) {
return false;
}

static uint64_t encodeValueX86(uint32_t Type, uint64_t Value, uint64_t PC) {
switch (Type) {
default:
Expand Down Expand Up @@ -798,19 +726,6 @@ bool Relocation::skipRelocationType(uint32_t Type) {
}
}

bool Relocation::skipRelocationProcess(uint32_t &Type, uint64_t Contents) {
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return skipRelocationProcessAArch64(Type, Contents);
case Triple::riscv64:
return skipRelocationProcessRISCV(Type, Contents);
case Triple::x86_64:
return skipRelocationProcessX86(Type, Contents);
}
}

uint64_t Relocation::encodeValue(uint32_t Type, uint64_t Value, uint64_t PC) {
switch (Arch) {
default:
Expand Down
43 changes: 20 additions & 23 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation(
ErrorOr<uint64_t> Value =
BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
assert(Value && "failed to extract relocated value");
if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
return true;

ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
Addend = getRelocationAddend(InputFile, Rel);
Expand Down Expand Up @@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation(
}
}

// If no symbol has been found or if it is a relocation requiring the
// creation of a GOT entry, do not link against the symbol but against
// whatever address was extracted from the instruction itself. We are
// not creating a GOT entry as this was already processed by the linker.
// For GOT relocs, do not subtract addend as the addend does not refer
// to this instruction's target, but it refers to the target in the GOT
// entry.
if (Relocation::isGOT(RType)) {
Addend = 0;
SymbolAddress = ExtractedValue + PCRelOffset;
} else if (Relocation::isTLS(RType)) {
// GOT relocation can cause the underlying instruction to be modified by the
// linker, resulting in the extracted value being different from the actual
// symbol. It's also possible to have a GOT entry for a symbol defined in the
// binary. In the latter case, the instruction can be using the GOT version
// causing the extracted value mismatch. Similar cases can happen for TLS.
// Pass the relocation information as is to the disassembler and let it decide
// how to use it for the operand symbolization.
if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
SkipVerification = true;
} else if (!SymbolAddress) {
assert(!IsSectionRelocation);
Expand Down Expand Up @@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,

MCSymbol *ReferencedSymbol = nullptr;
if (!IsSectionRelocation) {
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
ReferencedSymbol = BD->getSymbol();
else if (BC->isGOTSymbol(SymbolName))
} else if (BC->isGOTSymbol(SymbolName)) {
if (BinaryData *BD = BC->getGOTSymbol())
ReferencedSymbol = BD->getSymbol();
} else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
ReferencedSymbol = BD->getSymbol();
}
}

ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
Expand Down Expand Up @@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
}
}

if (ForceRelocation) {
std::string Name =
Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
SymbolAddress = 0;
if (Relocation::isGOT(RType))
Addend = Address;
if (ForceRelocation && !ReferencedBF) {
// Create the relocation symbol if it's not defined in the binary.
if (SymbolAddress == 0)
ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);

LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
<< SymbolName << " with addend " << Addend << '\n');
<< ReferencedSymbol->getName() << " with addend "
<< Addend << '\n');
} else if (ReferencedBF) {
ReferencedSymbol = ReferencedBF->getSymbol();
uint64_t RefFunctionOffset = 0;
Expand Down
4 changes: 2 additions & 2 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Inst.getOpcode() == AArch64::ADR;
}

bool isAddXri(const MCInst &Inst) const {
bool isAddXri(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::ADDXri;
}

Expand Down Expand Up @@ -318,7 +318,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.getOpcode() == AArch64::CBZX);
}

bool isMOVW(const MCInst &Inst) const {
bool isMOVW(const MCInst &Inst) const override {
return (Inst.getOpcode() == AArch64::MOVKWi ||
Inst.getOpcode() == AArch64::MOVKXi ||
Inst.getOpcode() == AArch64::MOVNWi ||
Expand Down
78 changes: 62 additions & 16 deletions bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
};

// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
// NOP+ADR. After the conversion, the linker might keep the relocations and
// if we try to symbolize ADR's operand using outdated relocations, we might
// get unexpected results. Hence, we check for the conversion/relaxation, and
// ignore the relocation. The symbolization is done based on the PC-relative
// value of the operand instead.
if (Relocation && BC.MIB->isADR(Inst)) {
if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
<< Twine::utohexstr(InstAddress) << '\n');
Relocation = nullptr;
if (Relocation) {
auto AdjustedRel = adjustRelocation(*Relocation, Inst);
if (AdjustedRel) {
addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
return true;
}
}

if (Relocation) {
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
return true;
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
<< Twine::utohexstr(InstAddress) << '\n');
}

if (!BC.MIB->hasPCRelOperand(Inst))
Expand All @@ -88,6 +79,61 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
return true;
}

std::optional<Relocation>
AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
const MCInst &Inst) const {
BinaryContext &BC = Function.getBinaryContext();

// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
// NOP+ADR. After the conversion, the linker might keep the relocations and
// if we try to symbolize ADR's operand using outdated relocations, we might
// get unexpected results. Hence, we check for the conversion/relaxation, and
// ignore the relocation. The symbolization is done based on the PC-relative
// value of the operand instead.
if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
return std::nullopt;

// The linker might perform TLS relocations relaxations, such as changed TLS
// access model (e.g. changed global dynamic model to initial exec), thus
// changing the instructions. The static relocations might be invalid at this
// point and we don't have to process these relocations anymore. More
// information could be found by searching elfNN_aarch64_tls_relax in bfd.
if (BC.MIB->isMOVW(Inst)) {
switch (Rel.Type) {
default:
break;
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
return std::nullopt;
}
}

if (!Relocation::isGOT(Rel.Type))
return Rel;

Relocation AdjustedRel = Rel;
if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
// The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
// second instruction and have to use the relocation type for ADD.
AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
} else {
// For instructions that reference GOT, ignore the referenced symbol and
// use value at the relocation site. FixRelaxationPass will look at
// instruction pairs and will perform necessary adjustments.
ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
assert(SymbolValue && "Symbol value should be set");
const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;

AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
AdjustedRel.Addend = Rel.Value;
}

return AdjustedRel;
}

void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
int64_t Value,
uint64_t Address) {}
Expand Down
8 changes: 8 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "bolt/Core/BinaryFunction.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
#include <optional>

namespace llvm {
namespace bolt {
Expand All @@ -20,6 +21,13 @@ class AArch64MCSymbolizer : public MCSymbolizer {
BinaryFunction &Function;
bool CreateNewSymbols{true};

/// Modify relocation \p Rel based on type of the relocation and the
/// instruction it was applied to. Return the new relocation info, or
/// std::nullopt if the relocation should be ignored, e.g. in the case the
/// instruction was modified by the linker.
std::optional<Relocation> adjustRelocation(const Relocation &Rel,
const MCInst &Inst) const;

public:
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
Expand Down