Skip to content

Commit ba3f372

Browse files
rafaelaulermemfrob
authored andcommitted
[BOLT-AArch64] Support reordering spec06 gcc relocs
Summary: Enhance the basic infrastructure for relocation mode for AArch64 to make a reasonably large program work after reordering (gcc). Detect jump table patterns and skip optimizing functions with jump tables in AArch64, as those will require extra future effort to fully decode. To make these work in relocation mode, we skip changing the function body and introduce a mode to preserve even the original nops. By not changing any local offsets in the function, the input original jump tables should just work. Functions with no jump tables are optimized with BB reordering. No other optimizations have been tested. (cherry picked from FBD6130117)
1 parent cc53b8c commit ba3f372

File tree

7 files changed

+206
-43
lines changed

7 files changed

+206
-43
lines changed

bolt/BinaryBasicBlock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <set>
2727

2828
namespace llvm {
29+
2930
namespace bolt {
3031

3132
class BinaryFunction;

bolt/BinaryContext.cpp

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -493,11 +493,14 @@ size_t Relocation::getSizeForType(uint64_t Type) {
493493
case ELF::R_AARCH64_ADR_PREL_PG_HI21:
494494
case ELF::R_AARCH64_LDST64_ABS_LO12_NC:
495495
case ELF::R_AARCH64_ADD_ABS_LO12_NC:
496+
case ELF::R_AARCH64_LDST128_ABS_LO12_NC:
496497
case ELF::R_AARCH64_LDST32_ABS_LO12_NC:
498+
case ELF::R_AARCH64_LDST16_ABS_LO12_NC:
497499
case ELF::R_AARCH64_LDST8_ABS_LO12_NC:
498500
case ELF::R_AARCH64_ADR_GOT_PAGE:
499501
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
500502
case ELF::R_AARCH64_JUMP26:
503+
case ELF::R_AARCH64_PREL32:
501504
return 4;
502505
case ELF::R_X86_64_PC64:
503506
case ELF::R_X86_64_64:
@@ -506,26 +509,31 @@ size_t Relocation::getSizeForType(uint64_t Type) {
506509
}
507510
}
508511

509-
uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents) {
512+
uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents,
513+
uint64_t PC) {
510514
switch (Type) {
511515
default:
512516
llvm_unreachable("unsupported relocation type");
513517
case ELF::R_AARCH64_ABS64:
514518
return Contents;
519+
case ELF::R_AARCH64_PREL32:
520+
return static_cast<int64_t>(PC) + SignExtend64<32>(Contents & 0xffffffff);
515521
case ELF::R_AARCH64_JUMP26:
516522
case ELF::R_AARCH64_CALL26:
517523
// Immediate goes in bits 25:0 of B and BL.
518524
Contents &= ~0xfffffffffc000000ULL;
519-
return SignExtend64<28>(Contents << 2);
525+
return static_cast<int64_t>(PC) + SignExtend64<28>(Contents << 2);
520526
case ELF::R_AARCH64_ADR_GOT_PAGE:
521527
case ELF::R_AARCH64_ADR_PREL_PG_HI21: {
522528
// Bits 32:12 of Symbol address goes in bits 30:29 + 23:5 of ADRP
523529
// instruction
524-
Contents &= ~0xffffffff9f00001fU;
530+
Contents &= ~0xffffffff9f00001fUll;
525531
auto LowBits = (Contents >> 29) & 0x3;
526532
auto HighBits = (Contents >> 5) & 0x7ffff;
527533
Contents = LowBits | (HighBits << 2);
528-
return SignExtend64<32>(Contents << 12);
534+
Contents = static_cast<int64_t>(PC) + SignExtend64<32>(Contents << 12);
535+
Contents &= ~0xfffUll;
536+
return Contents;
529537
}
530538
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
531539
case ELF::R_AARCH64_LDST64_ABS_LO12_NC: {
@@ -539,12 +547,24 @@ uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents) {
539547
Contents &= ~0xffffffffffc003ffU;
540548
return Contents >> (10 - 0);
541549
}
550+
case ELF::R_AARCH64_LDST128_ABS_LO12_NC: {
551+
// Immediate goes in bits 21:10 of ADD instruction, taken
552+
// from bits 11:4 of Symbol address
553+
Contents &= ~0xffffffffffc003ffU;
554+
return Contents >> (10 - 4);
555+
}
542556
case ELF::R_AARCH64_LDST32_ABS_LO12_NC: {
543557
// Immediate goes in bits 21:10 of ADD instruction, taken
544558
// from bits 11:2 of Symbol address
545559
Contents &= ~0xffffffffffc003ffU;
546560
return Contents >> (10 - 2);
547561
}
562+
case ELF::R_AARCH64_LDST16_ABS_LO12_NC: {
563+
// Immediate goes in bits 21:10 of ADD instruction, taken
564+
// from bits 11:1 of Symbol address
565+
Contents &= ~0xffffffffffc003ffU;
566+
return Contents >> (10 - 1);
567+
}
548568
case ELF::R_AARCH64_LDST8_ABS_LO12_NC: {
549569
// Immediate goes in bits 21:10 of ADD instruction, taken
550570
// from bits 11:0 of Symbol address
@@ -554,6 +574,16 @@ uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents) {
554574
}
555575
}
556576

577+
bool Relocation::isGOT(uint64_t Type) {
578+
switch (Type) {
579+
default:
580+
return false;
581+
case ELF::R_AARCH64_ADR_GOT_PAGE:
582+
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
583+
return true;
584+
}
585+
}
586+
557587
bool Relocation::isPCRelative(uint64_t Type) {
558588
switch (Type) {
559589
default:
@@ -566,7 +596,9 @@ bool Relocation::isPCRelative(uint64_t Type) {
566596
case ELF::R_AARCH64_ABS64:
567597
case ELF::R_AARCH64_LDST64_ABS_LO12_NC:
568598
case ELF::R_AARCH64_ADD_ABS_LO12_NC:
599+
case ELF::R_AARCH64_LDST128_ABS_LO12_NC:
569600
case ELF::R_AARCH64_LDST32_ABS_LO12_NC:
601+
case ELF::R_AARCH64_LDST16_ABS_LO12_NC:
570602
case ELF::R_AARCH64_LDST8_ABS_LO12_NC:
571603
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
572604
return false;
@@ -582,6 +614,7 @@ bool Relocation::isPCRelative(uint64_t Type) {
582614
case ELF::R_AARCH64_ADR_PREL_PG_HI21:
583615
case ELF::R_AARCH64_ADR_GOT_PAGE:
584616
case ELF::R_AARCH64_JUMP26:
617+
case ELF::R_AARCH64_PREL32:
585618
return true;
586619
}
587620
}

bolt/BinaryContext.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,14 @@ struct Relocation {
6767
/// Extract current relocated value from binary contents. This is used for
6868
/// RISC architectures where values are encoded in specific bits depending
6969
/// on the relocation value.
70-
static uint64_t extractValue(uint64_t Type, uint64_t Contents);
70+
static uint64_t extractValue(uint64_t Type, uint64_t Contents, uint64_t PC);
7171

7272
/// Return true if relocation type is PC-relative. Return false otherwise.
7373
static bool isPCRelative(uint64_t Type);
7474

75+
/// Return true if relocation type implies the creation of a GOT entry
76+
static bool isGOT(uint64_t Type);
77+
7578
/// Emit relocation at a current \p Streamer' position. The caller is
7679
/// responsible for setting the position correctly.
7780
size_t emit(MCStreamer *Streamer) const;
@@ -303,6 +306,10 @@ class BinaryContext {
303306
SmallString<256> Code;
304307
SmallVector<MCFixup, 4> Fixups;
305308
raw_svector_ostream VecOS(Code);
309+
if (MIA->isCFI(*Beg)) {
310+
++Beg;
311+
continue;
312+
}
306313
MCE->encodeInstruction(*Beg++, VecOS, Fixups, *STI);
307314
Size += Code.size();
308315
}

bolt/BinaryFunction.cpp

Lines changed: 108 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -575,28 +575,89 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
575575
int64_t DispValue;
576576
const MCExpr *DispExpr;
577577

578+
// In AArch, identify the instruction adding the PC-relative offset to
579+
// jump table entries to correctly decode it.
580+
MCInst *PCRelBaseInstr;
581+
uint64_t PCRelAddr = 0;
582+
583+
MutableArrayRef<MCInst> BB = Instructions;
584+
585+
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
586+
PreserveNops = opts::Relocs;
587+
// Start at the last label as an approximation of the current basic block.
588+
// This is a heuristic, since the full set of labels have yet to be
589+
// determined
590+
for (auto LI = Labels.rbegin(); LI != Labels.rend(); ++LI) {
591+
auto II = InstructionOffsets.find(LI->first);
592+
if (II != InstructionOffsets.end()) {
593+
BB = BB.slice(II->second);
594+
break;
595+
}
596+
}
597+
}
598+
578599
auto Type = BC.MIA->analyzeIndirectBranch(Instruction,
579-
Instructions,
600+
BB,
580601
PtrSize,
581602
MemLocInstr,
582603
BaseRegNum,
583604
IndexRegNum,
584605
DispValue,
585-
DispExpr);
606+
DispExpr,
607+
PCRelBaseInstr);
586608

587609
if (Type == IndirectBranchType::UNKNOWN && !MemLocInstr)
588610
return Type;
589611

590612
if (MemLocInstr != &Instruction)
591613
IndexRegNum = 0;
592614

615+
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
616+
const auto *Sym = BC.MIA->getTargetSymbol(*PCRelBaseInstr, 1);
617+
assert (Sym && "Symbol extraction failed");
618+
auto SI = BC.GlobalSymbols.find(Sym->getName());
619+
if (SI != BC.GlobalSymbols.end()) {
620+
PCRelAddr = SI->second;
621+
} else {
622+
for (auto &Elmt : Labels) {
623+
if (Elmt.second == Sym) {
624+
PCRelAddr = Elmt.first + getAddress();
625+
break;
626+
}
627+
}
628+
}
629+
uint64_t InstrAddr = 0;
630+
for (auto II = InstructionOffsets.rbegin(); II != InstructionOffsets.rend();
631+
++II) {
632+
if (&Instructions[II->second] == PCRelBaseInstr) {
633+
InstrAddr = II->first + getAddress();
634+
break;
635+
}
636+
}
637+
assert(InstrAddr != 0 && "instruction not found");
638+
// We do this to avoid spurious references to code locations outside this
639+
// function (for example, if the indirect jump lives in the last basic
640+
// block of the function, it will create a reference to the next function).
641+
// This replaces a symbol reference with an immediate.
642+
BC.MIA->replaceMemOperandDisp(*PCRelBaseInstr,
643+
MCOperand::createImm(PCRelAddr - InstrAddr));
644+
// FIXME: Disable full jump table processing for AArch64 until we have a
645+
// proper way of determining the jump table limits.
646+
return IndirectBranchType::UNKNOWN;
647+
}
648+
593649
// RIP-relative addressing should be converted to symbol form by now
594650
// in processed instructions (but not in jump).
595651
if (DispExpr) {
596-
auto SI = BC.GlobalSymbols.find(DispExpr->getSymbol().getName());
652+
auto SI =
653+
BC.GlobalSymbols.find(BC.MIA->getTargetSymbol(DispExpr)->getName());
597654
assert(SI != BC.GlobalSymbols.end() && "global symbol needs a value");
598655
ArrayStart = SI->second;
599656
BaseRegNum = 0;
657+
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
658+
ArrayStart &= ~0xFFFULL;
659+
ArrayStart += DispValue & 0xFFFULL;
660+
}
600661
} else {
601662
ArrayStart = static_cast<uint64_t>(DispValue);
602663
}
@@ -679,7 +740,9 @@ IndirectBranchType BinaryFunction::processIndirectBranch(MCInst &Instruction,
679740
<< " is referencing address 0x"
680741
<< Twine::utohexstr(Section.getAddress() + ValueOffset));
681742
// Extract the value and increment the offset.
682-
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
743+
if (BC.TheTriple->getArch() == llvm::Triple::aarch64) {
744+
Value = PCRelAddr + DE.getSigned(&ValueOffset, EntrySize);
745+
} else if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
683746
Value = ArrayStart + DE.getSigned(&ValueOffset, 4);
684747
} else {
685748
Value = DE.getAddress(&ValueOffset);
@@ -810,7 +873,8 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
810873
if (!TargetSymbol && Section && Section->isText() &&
811874
(BC.TheTriple->getArch() != llvm::Triple::aarch64 ||
812875
!BC.MIA->isADRP(Instruction))) {
813-
if (containsAddress(TargetAddress)) {
876+
if (containsAddress(TargetAddress, /*UseMaxSize=*/
877+
BC.TheTriple->getArch() == llvm::Triple::aarch64)) {
814878
if (TargetAddress != getAddress()) {
815879
// The address could potentially escape. Mark it as another entry
816880
// point into the function.
@@ -831,7 +895,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
831895
Instruction,
832896
MCSymbolRefExpr::create(
833897
TargetSymbol, MCSymbolRefExpr::VK_None, *BC.Ctx),
834-
*BC.Ctx)));
898+
*BC.Ctx, 0)));
835899
return true;
836900
};
837901

@@ -890,6 +954,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
890954
}
891955

892956
// Check if there's a relocation associated with this instruction.
957+
bool UsedReloc{false};
893958
if (!Relocations.empty()) {
894959
auto RI = Relocations.lower_bound(Offset);
895960
if (RI != Relocations.end() && RI->first < Offset + Size) {
@@ -900,15 +965,21 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
900965
<< " for instruction at offset 0x"
901966
<< Twine::utohexstr(Offset) << '\n');
902967
int64_t Value;
903-
const auto Result =
904-
BC.MIA->replaceImmWithSymbol(Instruction, Relocation.Symbol,
905-
Relocation.Addend, Ctx.get(), Value);
968+
const auto Result = BC.MIA->replaceImmWithSymbol(
969+
Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(), Value,
970+
Relocation.Type);
906971
(void)Result;
907972
assert(Result && "cannot replace immediate with relocation");
973+
// For aarch, if we replaced an immediate with a symbol from a
974+
// relocation, we mark it so we do not try to further process a
975+
// pc-relative operand. All we need is the symbol.
976+
if (BC.TheTriple->getArch() == llvm::Triple::aarch64)
977+
UsedReloc = true;
908978

909979
// Make sure we replaced the correct immediate (instruction
910980
// can have multiple immediate operands).
911-
assert(static_cast<uint64_t>(Value) == Relocation.Value &&
981+
assert((BC.TheTriple->getArch() == llvm::Triple::aarch64 ||
982+
static_cast<uint64_t>(Value) == Relocation.Value) &&
912983
"immediate value mismatch in function");
913984
}
914985
}
@@ -1081,7 +1152,7 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
10811152
// Indirect call. We only need to fix it if the operand is RIP-relative
10821153
if (IsSimple && MIA->hasPCRelOperand(Instruction)) {
10831154
if (!handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size)) {
1084-
errs() << "BOLT-ERROR: cannot handle RIP operand at 0x"
1155+
errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
10851156
<< Twine::utohexstr(AbsoluteInstrAddr)
10861157
<< ". Skipping function " << *this << ".\n";
10871158
if (opts::Relocs)
@@ -1091,9 +1162,9 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
10911162
}
10921163
}
10931164
} else {
1094-
if (MIA->hasPCRelOperand(Instruction)) {
1165+
if (MIA->hasPCRelOperand(Instruction) && !UsedReloc) {
10951166
if (!handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size)) {
1096-
errs() << "BOLT-ERROR: cannot handle RIP operand at 0x"
1167+
errs() << "BOLT-ERROR: cannot handle PC-relative operand at 0x"
10971168
<< Twine::utohexstr(AbsoluteInstrAddr)
10981169
<< ". Skipping function " << *this << ".\n";
10991170
if (opts::Relocs)
@@ -1359,7 +1430,7 @@ bool BinaryFunction::buildCFG() {
13591430
// Ignore nops. We use nops to derive alignment of the next basic block.
13601431
// It will not always work, as some blocks are naturally aligned, but
13611432
// it's just part of heuristic for block alignment.
1362-
if (MIA->isNoop(Instr)) {
1433+
if (MIA->isNoop(Instr) && !PreserveNops) {
13631434
IsLastInstrNop = true;
13641435
continue;
13651436
}
@@ -2593,9 +2664,8 @@ void BinaryFunction::emitConstantIslands(MCStreamer &Streamer) {
25932664
outs() << "BOLT-INFO: emitting constant island for function " << *this
25942665
<< "\n";
25952666

2596-
auto IS = IslandSymbols.begin();
2597-
25982667
// We split the island into smaller blocks and output labels between them.
2668+
auto IS = IslandSymbols.begin();
25992669
for (auto DataIter = DataOffsets.begin(); DataIter != DataOffsets.end();
26002670
++DataIter) {
26012671
uint64_t FunctionOffset = *DataIter;
@@ -2617,18 +2687,33 @@ void BinaryFunction::emitConstantIslands(MCStreamer &Streamer) {
26172687
if (FunctionOffset == EndOffset)
26182688
continue; // Size is zero, nothing to emit
26192689

2620-
// Emit labels and data
2621-
while (IS != IslandSymbols.end() && IS->first < EndOffset) {
2622-
auto NextStop = IS->first;
2690+
// Emit labels, relocs and data
2691+
auto RI = MoveRelocations.lower_bound(FunctionOffset);
2692+
while ((IS != IslandSymbols.end() && IS->first < EndOffset) ||
2693+
(RI != MoveRelocations.end() && RI->first < EndOffset)) {
2694+
auto NextLabelOffset = IS == IslandSymbols.end() ? EndOffset : IS->first;
2695+
auto NextRelOffset = RI == MoveRelocations.end() ? EndOffset : RI->first;
2696+
auto NextStop = std::min(NextLabelOffset, NextRelOffset);
26232697
assert(NextStop <= EndOffset && "internal overflow error");
26242698
if (FunctionOffset < NextStop) {
26252699
Streamer.EmitBytes(FunctionContents.slice(FunctionOffset, NextStop));
26262700
FunctionOffset = NextStop;
26272701
}
2628-
DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << IS->second->getName()
2629-
<< " at offset 0x" << Twine::utohexstr(IS->first) << '\n');
2630-
Streamer.EmitLabel(IS->second);
2631-
++IS;
2702+
if (IS != IslandSymbols.end() && FunctionOffset == IS->first) {
2703+
DEBUG(dbgs() << "BOLT-DEBUG: emitted label " << IS->second->getName()
2704+
<< " at offset 0x" << Twine::utohexstr(IS->first) << '\n');
2705+
Streamer.EmitLabel(IS->second);
2706+
++IS;
2707+
}
2708+
if (RI != MoveRelocations.end() && FunctionOffset == RI->first) {
2709+
auto RelocationSize = RI->second.emit(&Streamer);
2710+
DEBUG(dbgs() << "BOLT-DEBUG: emitted relocation for symbol "
2711+
<< RI->second.Symbol->getName() << " at offset 0x"
2712+
<< Twine::utohexstr(RI->first)
2713+
<< " with size " << RelocationSize << '\n');
2714+
FunctionOffset += RelocationSize;
2715+
++RI;
2716+
}
26322717
}
26332718
assert(FunctionOffset <= EndOffset && "overflow error");
26342719
if (FunctionOffset < EndOffset) {

0 commit comments

Comments
 (0)