Skip to content

Commit 1d701f5

Browse files
committed
[BOLT] Pass unfiltered relocations to disassembler. NFCI
Instead of filtering and modifying relocations in readRelocations(), preserve the relocation info and use it in the symbolizing disassembler. This change mostly affects AArch64, where we need to look at original linker relocations in order to properly symbolize instruction operands.
1 parent e6382f2 commit 1d701f5

File tree

8 files changed

+115
-133
lines changed

8 files changed

+115
-133
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,16 @@ class MCPlusBuilder {
637637
return false;
638638
}
639639

640+
virtual bool isAddXri(const MCInst &Inst) const {
641+
llvm_unreachable("not implemented");
642+
return false;
643+
}
644+
645+
virtual bool isMOVW(const MCInst &Inst) const {
646+
llvm_unreachable("not implemented");
647+
return false;
648+
}
649+
640650
virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; }
641651

642652
virtual bool mayLoad(const MCInst &Inst) const {

bolt/include/bolt/Core/Relocation.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,6 @@ struct Relocation {
5959
/// Skip relocations that we don't want to handle in BOLT
6060
static bool skipRelocationType(uint64_t Type);
6161

62-
/// Handle special cases when relocation should not be processed by BOLT or
63-
/// change relocation \p Type to proper one before continuing if \p Contents
64-
/// and \P Type mismatch occurred.
65-
static bool skipRelocationProcess(uint64_t &Type, uint64_t Contents);
66-
6762
// Adjust value depending on relocation type (make it PC relative or not)
6863
static uint64_t encodeValue(uint64_t Type, uint64_t Value, uint64_t PC);
6964

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() {
14731473
}
14741474
}
14751475

1476+
uint64_t Addend = Relocation.Addend;
1477+
1478+
// For GOT relocations, create a reference against GOT entry ignoring
1479+
// the relocation symbol.
1480+
if (Relocation::isGOT(Relocation.Type)) {
1481+
assert(Relocation::isPCRelative(Relocation.Type) &&
1482+
"GOT relocation must be PC-relative on RISC-V");
1483+
Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
1484+
Addend = Relocation.Value + Relocation.Offset + getAddress();
1485+
}
14761486
int64_t Value = Relocation.Value;
14771487
const bool Result = BC.MIB->replaceImmWithSymbolRef(
1478-
Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
1479-
Relocation.Type);
1488+
Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
14801489
(void)Result;
14811490
assert(Result && "cannot replace immediate with relocation");
14821491
}

bolt/lib/Core/Relocation.cpp

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -256,78 +256,6 @@ static bool skipRelocationTypeRISCV(uint64_t Type) {
256256
}
257257
}
258258

259-
static bool skipRelocationProcessX86(uint64_t &Type, uint64_t Contents) {
260-
return false;
261-
}
262-
263-
static bool skipRelocationProcessAArch64(uint64_t &Type, uint64_t Contents) {
264-
auto IsMov = [](uint64_t Contents) -> bool {
265-
// The bits 28-23 are 0b100101
266-
return (Contents & 0x1f800000) == 0x12800000;
267-
};
268-
269-
auto IsB = [](uint64_t Contents) -> bool {
270-
// The bits 31-26 are 0b000101
271-
return (Contents & 0xfc000000) == 0x14000000;
272-
};
273-
274-
auto IsAddImm = [](uint64_t Contents) -> bool {
275-
// The bits 30-23 are 0b00100010
276-
return (Contents & 0x7F800000) == 0x11000000;
277-
};
278-
279-
// The linker might relax ADRP+LDR instruction sequence for loading symbol
280-
// address from GOT table to ADRP+ADD sequence that would point to the
281-
// binary-local symbol. Change relocation type in order to process it right.
282-
if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
283-
Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
284-
return false;
285-
}
286-
287-
// The linker might perform TLS relocations relaxations, such as
288-
// changed TLS access model (e.g. changed global dynamic model
289-
// to initial exec), thus changing the instructions. The static
290-
// relocations might be invalid at this point and we might no
291-
// need to process these relocations anymore.
292-
// More information could be found by searching
293-
// elfNN_aarch64_tls_relax in bfd
294-
switch (Type) {
295-
default:
296-
break;
297-
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
298-
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
299-
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
300-
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
301-
if (IsMov(Contents))
302-
return true;
303-
}
304-
}
305-
306-
// The linker might replace load/store instruction with jump and
307-
// veneer due to errata 843419
308-
// https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
309-
// Thus load/store relocations for these instructions must be ignored
310-
// NOTE: We only process GOT and TLS relocations this way since the
311-
// addend used in load/store instructions won't change after bolt
312-
// (it is important since the instruction in veneer won't have relocation)
313-
switch (Type) {
314-
default:
315-
break;
316-
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
317-
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
318-
case ELF::R_AARCH64_TLSDESC_LD64_LO12: {
319-
if (IsB(Contents))
320-
return true;
321-
}
322-
}
323-
324-
return false;
325-
}
326-
327-
static bool skipRelocationProcessRISCV(uint64_t &Type, uint64_t Contents) {
328-
return false;
329-
}
330-
331259
static uint64_t encodeValueX86(uint64_t Type, uint64_t Value, uint64_t PC) {
332260
switch (Type) {
333261
default:
@@ -797,19 +725,6 @@ bool Relocation::skipRelocationType(uint64_t Type) {
797725
}
798726
}
799727

800-
bool Relocation::skipRelocationProcess(uint64_t &Type, uint64_t Contents) {
801-
switch (Arch) {
802-
default:
803-
llvm_unreachable("Unsupported architecture");
804-
case Triple::aarch64:
805-
return skipRelocationProcessAArch64(Type, Contents);
806-
case Triple::riscv64:
807-
return skipRelocationProcessRISCV(Type, Contents);
808-
case Triple::x86_64:
809-
return skipRelocationProcessX86(Type, Contents);
810-
}
811-
}
812-
813728
uint64_t Relocation::encodeValue(uint64_t Type, uint64_t Value, uint64_t PC) {
814729
switch (Arch) {
815730
default:

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation(
22292229
ErrorOr<uint64_t> Value =
22302230
BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
22312231
assert(Value && "failed to extract relocated value");
2232-
if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
2233-
return true;
22342232

22352233
ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
22362234
Addend = getRelocationAddend(InputFile, Rel);
@@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation(
22832281
}
22842282
}
22852283

2286-
// If no symbol has been found or if it is a relocation requiring the
2287-
// creation of a GOT entry, do not link against the symbol but against
2288-
// whatever address was extracted from the instruction itself. We are
2289-
// not creating a GOT entry as this was already processed by the linker.
2290-
// For GOT relocs, do not subtract addend as the addend does not refer
2291-
// to this instruction's target, but it refers to the target in the GOT
2292-
// entry.
2293-
if (Relocation::isGOT(RType)) {
2294-
Addend = 0;
2295-
SymbolAddress = ExtractedValue + PCRelOffset;
2296-
} else if (Relocation::isTLS(RType)) {
2284+
// GOT relocation can cause the underlying instruction to be modified by the
2285+
// linker, resulting in the extracted value being different from the actual
2286+
// symbol. It's also possible to have a GOT entry for a symbol defined in the
2287+
// binary. In the latter case, the instruction can be using the GOT version
2288+
// causing the extracted value mismatch. Similar cases can happen for TLS.
2289+
// Pass the relocation information as is to the disassembler and let it decide
2290+
// how to use it for the operand symbolization.
2291+
if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
22972292
SkipVerification = true;
22982293
} else if (!SymbolAddress) {
22992294
assert(!IsSectionRelocation);
@@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
26662661

26672662
MCSymbol *ReferencedSymbol = nullptr;
26682663
if (!IsSectionRelocation) {
2669-
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
2664+
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
26702665
ReferencedSymbol = BD->getSymbol();
2671-
else if (BC->isGOTSymbol(SymbolName))
2666+
} else if (BC->isGOTSymbol(SymbolName)) {
26722667
if (BinaryData *BD = BC->getGOTSymbol())
26732668
ReferencedSymbol = BD->getSymbol();
2669+
} else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
2670+
ReferencedSymbol = BD->getSymbol();
2671+
}
26742672
}
26752673

26762674
ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
@@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
27982796
}
27992797
}
28002798

2801-
if (ForceRelocation) {
2802-
std::string Name =
2803-
Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
2804-
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
2805-
SymbolAddress = 0;
2806-
if (Relocation::isGOT(RType))
2807-
Addend = Address;
2799+
if (ForceRelocation && !ReferencedBF) {
2800+
// Create the relocation symbol if it's not defined in the binary.
2801+
if (SymbolAddress == 0)
2802+
ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
2803+
28082804
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
2809-
<< SymbolName << " with addend " << Addend << '\n');
2805+
<< ReferencedSymbol->getName() << " with addend "
2806+
<< Addend << '\n');
28102807
} else if (ReferencedBF) {
28112808
ReferencedSymbol = ReferencedBF->getSymbol();
28122809
uint64_t RefFunctionOffset = 0;

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
281281
return Inst.getOpcode() == AArch64::ADR;
282282
}
283283

284-
bool isAddXri(const MCInst &Inst) const {
284+
bool isAddXri(const MCInst &Inst) const override {
285285
return Inst.getOpcode() == AArch64::ADDXri;
286286
}
287287

@@ -318,7 +318,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
318318
Inst.getOpcode() == AArch64::CBZX);
319319
}
320320

321-
bool isMOVW(const MCInst &Inst) const {
321+
bool isMOVW(const MCInst &Inst) const override {
322322
return (Inst.getOpcode() == AArch64::MOVKWi ||
323323
Inst.getOpcode() == AArch64::MOVKXi ||
324324
Inst.getOpcode() == AArch64::MOVNWi ||

bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
4545
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
4646
};
4747

48-
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
49-
// NOP+ADR. After the conversion, the linker might keep the relocations and
50-
// if we try to symbolize ADR's operand using outdated relocations, we might
51-
// get unexpected results. Hence, we check for the conversion/relaxation, and
52-
// ignore the relocation. The symbolization is done based on the PC-relative
53-
// value of the operand instead.
54-
if (Relocation && BC.MIB->isADR(Inst)) {
55-
if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
56-
Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
57-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
58-
<< Twine::utohexstr(InstAddress) << '\n');
59-
Relocation = nullptr;
48+
if (Relocation) {
49+
auto AdjustedRel = adjustRelocation(*Relocation, Inst);
50+
if (AdjustedRel) {
51+
addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
52+
return true;
6053
}
61-
}
6254

63-
if (Relocation) {
64-
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
65-
return true;
55+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
56+
<< Twine::utohexstr(InstAddress) << '\n');
6657
}
6758

6859
if (!BC.MIB->hasPCRelOperand(Inst))
@@ -88,6 +79,63 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
8879
return true;
8980
}
9081

82+
std::optional<Relocation>
83+
AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
84+
const MCInst &Inst) const {
85+
BinaryContext &BC = Function.getBinaryContext();
86+
87+
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
88+
// NOP+ADR. After the conversion, the linker might keep the relocations and
89+
// if we try to symbolize ADR's operand using outdated relocations, we might
90+
// get unexpected results. Hence, we check for the conversion/relaxation, and
91+
// ignore the relocation. The symbolization is done based on the PC-relative
92+
// value of the operand instead.
93+
if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
94+
Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
95+
return std::nullopt;
96+
97+
// The linker might perform TLS relocations relaxations, such as
98+
// changed TLS access model (e.g. changed global dynamic model
99+
// to initial exec), thus changing the instructions. The static
100+
// relocations might be invalid at this point and we might no
101+
// need to process these relocations anymore.
102+
// More information could be found by searching
103+
// elfNN_aarch64_tls_relax in bfd
104+
if (BC.MIB->isMOVW(Inst)) {
105+
switch (Rel.Type) {
106+
default:
107+
break;
108+
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
109+
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
110+
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
111+
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
112+
return std::nullopt;
113+
}
114+
}
115+
116+
if (!Relocation::isGOT(Rel.Type))
117+
return Rel;
118+
119+
Relocation AdjustedRel = Rel;
120+
if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
121+
// The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
122+
// second instruction and have to use the relocation type for ADD.
123+
AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
124+
} else {
125+
// For instructions that reference GOT, ignore the referenced symbol and
126+
// use value at the relocation site. FixRelaxationPass will look at
127+
// instruction pairs and will perform necessary adjustments.
128+
ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
129+
assert(SymbolValue && "Symbol value should be set");
130+
const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
131+
132+
AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
133+
AdjustedRel.Addend = Rel.Value;
134+
}
135+
136+
return AdjustedRel;
137+
}
138+
91139
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
92140
int64_t Value,
93141
uint64_t Address) {}

bolt/lib/Target/AArch64/AArch64MCSymbolizer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "bolt/Core/BinaryFunction.h"
1313
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
14+
#include <optional>
1415

1516
namespace llvm {
1617
namespace bolt {
@@ -20,6 +21,13 @@ class AArch64MCSymbolizer : public MCSymbolizer {
2021
BinaryFunction &Function;
2122
bool CreateNewSymbols{true};
2223

24+
/// Modify relocation \p Rel based on type of the relocation and the
25+
/// instruction it was applied to. Return the new relocation info, or
26+
/// std::nullopt if the relocation should be ignored, e.g. in the case the
27+
/// instruction was modified by the linker.
28+
std::optional<Relocation> adjustRelocation(const Relocation &Rel,
29+
const MCInst &Inst) const;
30+
2331
public:
2432
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
2533
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),

0 commit comments

Comments
 (0)