Skip to content

Commit 8d18278

Browse files
rafaelaulermemfrob
authored andcommitted
[BOLT-AArch64] Support reordering bzip2 no relocs
Summary: Add functionality to support reordering bzip2 compiled to AArch64, with function splitting but without relocations: * Expand the AArch64 backend to support inverting branches and analyzing branches so BOLT reordering machinery is able to shuffle blocks and fix branches correctly; * Add a new pass named LongJmp to add stubs whenever code needs to jump to the cold area, when using function splitting, because of the limited target encoding capability in AArch64 (as a RISC architecture). (cherry picked from FBD5748184)
1 parent b1c26e7 commit 8d18278

12 files changed

+571
-22
lines changed

bolt/BinaryBasicBlock.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,21 @@ BinaryBasicBlock *BinaryBasicBlock::getSuccessor(const MCSymbol *Label) const {
138138
return nullptr;
139139
}
140140

141+
BinaryBasicBlock *
142+
BinaryBasicBlock::getSuccessor(const MCSymbol *Label,
143+
BinaryBranchInfo &BI) const {
144+
auto BIIter = branch_info_begin();
145+
for (BinaryBasicBlock *BB : successors()) {
146+
if (BB->getLabel() == Label) {
147+
BI = *BIIter;
148+
return BB;
149+
}
150+
++BIIter;
151+
}
152+
153+
return nullptr;
154+
}
155+
141156
BinaryBasicBlock *BinaryBasicBlock::getLandingPad(const MCSymbol *Label) const {
142157
for (BinaryBasicBlock *BB : landing_pads()) {
143158
if (BB->getLabel() == Label)

bolt/BinaryBasicBlock.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,10 @@ class BinaryBasicBlock {
333333
/// return the successor.
334334
BinaryBasicBlock *getSuccessor(const MCSymbol *Label = nullptr) const;
335335

336+
/// Return the related branch info as well as the successor.
337+
BinaryBasicBlock *getSuccessor(const MCSymbol *Label,
338+
BinaryBranchInfo &BI) const;
339+
336340
/// If the basic block ends with a conditional branch (possibly followed by
337341
/// an unconditional branch) and thus has 2 successors, return a successor
338342
/// corresponding to a jump condition which could be true or false.
@@ -637,6 +641,12 @@ class BinaryBasicBlock {
637641
return Instructions.erase(II);
638642
}
639643

644+
/// Erase all instructions
645+
void clear() {
646+
Instructions.clear();
647+
NumPseudos = 0;
648+
}
649+
640650
/// Retrieve iterator for \p Inst or return end iterator if instruction is not
641651
/// from this basic block.
642652
decltype(Instructions)::iterator findInstruction(const MCInst *Inst) {

bolt/BinaryContext.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ namespace opts {
2828
extern cl::OptionCategory BoltCategory;
2929

3030
extern cl::opt<bool> Relocs;
31+
extern cl::opt<BinaryFunction::ReorderType> ReorderFunctions;
3132

3233
static cl::opt<bool>
3334
PrintDebugInfo("print-debug-info",
@@ -186,6 +187,28 @@ unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
186187
return Ctx->getDwarfFile(Dir, FileNames[FileIndex - 1].Name, 0, DestCUID);
187188
}
188189

190+
std::vector<BinaryFunction *> BinaryContext::getSortedFunctions(
191+
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
192+
std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
193+
std::transform(BinaryFunctions.begin(), BinaryFunctions.end(),
194+
SortedFunctions.begin(),
195+
[](std::pair<const uint64_t, BinaryFunction> &BFI) {
196+
return &BFI.second;
197+
});
198+
199+
if (opts::ReorderFunctions != BinaryFunction::RT_NONE) {
200+
std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
201+
[](const BinaryFunction *A, const BinaryFunction *B) {
202+
if (A->hasValidIndex() && B->hasValidIndex()) {
203+
return A->getIndex() < B->getIndex();
204+
} else {
205+
return A->hasValidIndex();
206+
}
207+
});
208+
}
209+
return SortedFunctions;
210+
}
211+
189212
void BinaryContext::preprocessDebugInfo(
190213
std::map<uint64_t, BinaryFunction> &BinaryFunctions) {
191214
// Populate MCContext with DWARF files.

bolt/BinaryContext.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ class BinaryContext {
149149
/// Number of functions with profile information
150150
uint64_t NumProfiledFuncs{0};
151151

152+
/// Track next available address for new allocatable sections. RewriteInstance
153+
/// sets this prior to running BOLT passes, so layout passes are aware of the
154+
/// final addresses functions will have.
155+
uint64_t LayoutStartAddress{0};
156+
152157
/// True if the binary requires immediate relocation processing.
153158
bool RequiresZNow{false};
154159

@@ -272,6 +277,10 @@ class BinaryContext {
272277
const uint32_t SrcCUID,
273278
unsigned FileIndex);
274279

280+
/// Return functions in output layout order
281+
static std::vector<BinaryFunction *>
282+
getSortedFunctions(std::map<uint64_t, BinaryFunction> &BinaryFunctions);
283+
275284
/// Compute the native code size for a range of instructions.
276285
/// Note: this can be imprecise wrt the final binary since happening prior to
277286
/// relaxation, as well as wrt the original binary because of opcode

bolt/BinaryFunction.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3969,7 +3969,7 @@ DynoStats BinaryFunction::getDynoStats() const {
39693969
BC.MIA->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
39703970
}
39713971
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
3972-
if (BC.MIA->getMemoryOperandNo(Instr) != -1) {
3972+
if (BC.MIA->isIndirectCall(Instr)) {
39733973
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
39743974
} else if (const auto *CallSymbol = BC.MIA->getTargetSymbol(Instr)) {
39753975
const auto *BF = BC.getFunctionForSymbol(CallSymbol);

bolt/BinaryFunction.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ class BinaryFunction {
711711
assert(InstructionOffsets.size() == Instructions.size() &&
712712
"There must be one instruction at every offset.");
713713
Instructions.emplace_back(std::forward<MCInst>(Instruction));
714-
InstructionOffsets[Offset] = Instructions.size() - 1;
714+
InstructionOffsets[Offset] = Instructions.size() - 1;
715715
}
716716

717717
/// Return instruction at a given offset in the function. Valid before
@@ -1863,6 +1863,18 @@ class BinaryFunction {
18631863
return Estimate;
18641864
}
18651865

1866+
size_t estimateColdSize() const {
1867+
if (!isSplit())
1868+
return estimateSize();
1869+
size_t Estimate = 0;
1870+
for (const auto *BB : BasicBlocksLayout) {
1871+
if (BB->isCold()) {
1872+
Estimate += BC.computeCodeSize(BB->begin(), BB->end());
1873+
}
1874+
}
1875+
return Estimate;
1876+
}
1877+
18661878
size_t estimateSize() const {
18671879
size_t Estimate = 0;
18681880
for (const auto *BB : BasicBlocksLayout) {

bolt/BinaryPassManager.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "Passes/FrameOptimizer.h"
1515
#include "Passes/IndirectCallPromotion.h"
1616
#include "Passes/Inliner.h"
17+
#include "Passes/LongJmp.h"
1718
#include "Passes/PLTCall.h"
1819
#include "Passes/ReorderFunctions.h"
1920
#include "Passes/StokeInfo.h"
@@ -105,6 +106,13 @@ PrintFinalized("print-finalized",
105106
cl::Hidden,
106107
cl::cat(BoltOptCategory));
107108

109+
static cl::opt<bool>
110+
PrintLongJmp("print-longjmp",
111+
cl::desc("print functions after longjmp pass"),
112+
cl::ZeroOrMore,
113+
cl::Hidden,
114+
cl::cat(BoltOptCategory));
115+
108116
static cl::opt<bool>
109117
PrintICF("print-icf",
110118
cl::desc("print functions after ICF optimization"),
@@ -396,6 +404,12 @@ void BinaryFunctionPassManager::runAllPasses(
396404

397405
Manager.registerPass(llvm::make_unique<AllocCombinerPass>(PrintFOP));
398406

407+
// Thighten branches according to offset differences between branch and
408+
// targets. No extra instructions after this pass, otherwise we may have
409+
// relocations out of range and crash during linking.
410+
if (BC.TheTriple->getArch() == llvm::Triple::aarch64)
411+
Manager.registerPass(llvm::make_unique<LongJmpPass>(PrintLongJmp));
412+
399413
// This pass turns tail calls into jumps which makes them invisible to
400414
// function reordering. It's unsafe to use any CFG or instruction analysis
401415
// after this point.

bolt/Passes/BinaryPasses.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ void OptimizeBodylessFunctions::optimizeCalls(BinaryFunction &BF,
233233
<< ": replacing call to " << OriginalTarget->getName()
234234
<< " by call to " << Target->getName()
235235
<< " while folding " << CallSites << " call sites\n");
236-
BC.MIA->replaceCallTargetOperand(Inst, Target, BC.Ctx.get());
236+
BC.MIA->replaceBranchTarget(Inst, Target, BC.Ctx.get());
237237

238238
NumOptimizedCallSites += CallSites;
239239
if (BB->hasProfile()) {

bolt/Passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses
1313
IndirectCallPromotion.cpp
1414
Inliner.cpp
1515
LivenessAnalysis.cpp
16+
LongJmp.cpp
1617
MCF.cpp
1718
PettisAndHansen.cpp
1819
PLTCall.cpp

0 commit comments

Comments
 (0)