Skip to content

Commit 9aa218d

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.4
2 parents 9f4cea2 + b325976 commit 9aa218d

28 files changed

+607
-84
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,12 @@ class BinaryContext {
230230
/// Functions injected by BOLT
231231
std::vector<BinaryFunction *> InjectedBinaryFunctions;
232232

233+
/// Thunk functions.
234+
std::vector<BinaryFunction *> ThunkBinaryFunctions;
235+
236+
/// Function that precedes thunks in the binary.
237+
const BinaryFunction *ThunkLocation{nullptr};
238+
233239
/// Jump tables for all functions mapped by address.
234240
std::map<uint64_t, JumpTable *> JumpTables;
235241

@@ -553,6 +559,16 @@ class BinaryContext {
553559
return InjectedBinaryFunctions;
554560
}
555561

562+
BinaryFunction *createThunkBinaryFunction(const std::string &Name);
563+
564+
std::vector<BinaryFunction *> &getThunkBinaryFunctions() {
565+
return ThunkBinaryFunctions;
566+
}
567+
568+
const BinaryFunction *getThunkLocation() const { return ThunkLocation; }
569+
570+
void setThunkLocation(const BinaryFunction *BF) { ThunkLocation = BF; }
571+
556572
/// Return vector with all functions, i.e. include functions from the input
557573
/// binary and functions created by BOLT.
558574
std::vector<BinaryFunction *> getAllBinaryFunctions();
@@ -1372,6 +1388,10 @@ class BinaryContext {
13721388
uint64_t
13731389
computeInstructionSize(const MCInst &Inst,
13741390
const MCCodeEmitter *Emitter = nullptr) const {
1391+
// FIXME: hack for faster size computation on aarch64.
1392+
if (isAArch64())
1393+
return MIB->isPseudo(Inst) ? 0 : 4;
1394+
13751395
if (std::optional<uint32_t> Size = MIB->getSize(Inst))
13761396
return *Size;
13771397

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,10 @@ class BinaryFunction {
368368
/// True if the function should not have an associated symbol table entry.
369369
bool IsAnonymous{false};
370370

371+
/// True if the function is used for remapping hot text and shall not be
372+
/// placed on a huge page.
373+
bool IsHotTextMover{false};
374+
371375
/// Name for the section this function code should reside in.
372376
std::string CodeSectionName;
373377

@@ -1411,6 +1415,8 @@ class BinaryFunction {
14111415
/// Return true if the function uses ORC format for stack unwinding.
14121416
bool hasORC() const { return HasORC; }
14131417

1418+
bool isHotTextMover() const { return IsHotTextMover; }
1419+
14141420
const JumpTable *getJumpTable(const MCInst &Inst) const {
14151421
const uint64_t Address = BC.MIB->getJumpTable(Inst);
14161422
return getJumpTableContainingAddress(Address);
@@ -1761,6 +1767,8 @@ class BinaryFunction {
17611767
/// Mark function that should not be emitted.
17621768
void setIgnored();
17631769

1770+
void setHotTextMover(bool V) { IsHotTextMover = V; }
1771+
17641772
void setHasIndirectTargetToSplitFragment(bool V) {
17651773
HasIndirectTargetToSplitFragment = V;
17661774
}

bolt/include/bolt/Passes/LongJmp.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,28 @@ class LongJmpPass : public BinaryFunctionPass {
7676
/// 128MB of each other.
7777
void relaxLocalBranches(BinaryFunction &BF);
7878

79+
struct FunctionCluster {
80+
DenseSet<BinaryFunction *> Functions;
81+
82+
// Functions that this cluster of functions is calling. Note that it
83+
// excludes all functions in the cluster itself.
84+
DenseSet<BinaryFunction *> Callees;
85+
86+
uint64_t Size{0};
87+
88+
// Last function in the cluster.
89+
BinaryFunction *LastBF{nullptr};
90+
};
91+
92+
/// Maximum size of the function cluster. Note that it's less than 128MB
93+
/// as the size of the cluster plus thunk island should be less than 128MB.
94+
static constexpr uint64_t MaxClusterSize = 125 * 1024 * 1024;
95+
96+
/// Relax calls for medium code model where code is < 256MB.
97+
/// A thunk island will be introduced between two clusters of functions to
98+
/// enable calls over 128MB.
99+
void relaxCalls(BinaryContext &BC);
100+
79101
/// -- Layout estimation methods --
80102
/// Try to do layout before running the emitter, by looking at BinaryFunctions
81103
/// and MCInsts -- this is an estimation. To be correct for longjmp inserter

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -370,33 +370,46 @@ class DataAggregator : public DataReader {
370370
/// memory.
371371
///
372372
/// File format syntax:
373-
/// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
374-
/// <count> [<mispred_count>]
373+
/// E <event>
374+
/// S <start> <count>
375+
/// T <start> <end> <ft_end> <count>
376+
/// B <start> <end> <count> <mispred_count>
377+
/// [Ff] <start> <end> <count>
375378
///
376-
/// B - indicates an aggregated branch
377-
/// F - an aggregated fall-through
379+
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
380+
///
381+
/// E - name of the sampling event used for subsequent entries
382+
/// S - indicates an aggregated basic sample at <start>
383+
/// B - indicates an aggregated branch from <start> to <end>
384+
/// F - an aggregated fall-through from <start> to <end>
378385
/// f - an aggregated fall-through with external origin - used to disambiguate
379386
/// between a return hitting a basic block head and a regular internal
380387
/// jump to the block
381-
/// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
382-
///
383-
/// <start_id> - build id of the object containing the start address. We can
384-
/// skip it for the main binary and use "X" for an unknown object. This will
385-
/// save some space and facilitate human parsing.
386-
///
387-
/// <start_offset> - hex offset from the object base load address (0 for the
388-
/// main executable unless it's PIE) to the start address.
388+
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
389+
/// to <ft_end>
389390
///
390-
/// <end_id>, <end_offset> - same for the end address.
391+
/// <id> - build id of the object containing the address. We can skip it for
392+
/// the main binary and use "X" for an unknown object. This will save some
393+
/// space and facilitate human parsing.
391394
///
392-
/// <ft_end> - same for the fallthrough_end address.
395+
/// <offset> - hex offset from the object base load address (0 for the
396+
/// main executable unless it's PIE) to the address.
393397
///
394-
/// <count> - total aggregated count of the branch or a fall-through.
398+
/// <count> - total aggregated count.
395399
///
396400
/// <mispred_count> - the number of times the branch was mispredicted.
397-
/// Omitted for fall-throughs.
398401
///
399402
/// Example:
403+
/// Basic samples profile:
404+
/// E cycles
405+
/// S 41be50 3
406+
/// E br_inst_retired.near_taken
407+
/// S 41be60 6
408+
///
409+
/// Trace profile combining branches and fall-throughs:
410+
/// T 4b196f 4b19e0 4b19ef 2
411+
///
412+
/// Legacy branch profile with separate branches and fall-throughs:
400413
/// F 41be50 41be50 3
401414
/// F 41be90 41be90 4
402415
/// B 4b1942 39b57f0 3 0

bolt/lib/Core/BinaryContext.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1600,7 +1600,21 @@ std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
16001600
SortedFunctions.begin(),
16011601
[](BinaryFunction &BF) { return &BF; });
16021602

1603-
llvm::stable_sort(SortedFunctions, compareBinaryFunctionByIndex);
1603+
llvm::stable_sort(SortedFunctions,
1604+
[](const BinaryFunction *A, const BinaryFunction *B) {
1605+
// Place hot text movers at the start.
1606+
if (A->isHotTextMover() && !B->isHotTextMover())
1607+
return true;
1608+
if (!A->isHotTextMover() && B->isHotTextMover())
1609+
return false;
1610+
if (A->hasValidIndex() && B->hasValidIndex()) {
1611+
return A->getIndex() < B->getIndex();
1612+
}
1613+
if (opts::HotFunctionsAtEnd)
1614+
return B->hasValidIndex();
1615+
else
1616+
return A->hasValidIndex();
1617+
});
16041618
return SortedFunctions;
16051619
}
16061620

@@ -2423,8 +2437,21 @@ BinaryContext::createInstructionPatch(uint64_t Address,
24232437
return PBF;
24242438
}
24252439

2440+
BinaryFunction *
2441+
BinaryContext::createThunkBinaryFunction(const std::string &Name) {
2442+
ThunkBinaryFunctions.push_back(new BinaryFunction(Name, *this, true));
2443+
BinaryFunction *BF = ThunkBinaryFunctions.back();
2444+
setSymbolToFunctionMap(BF->getSymbol(), BF);
2445+
BF->CurrentState = BinaryFunction::State::CFG;
2446+
return BF;
2447+
}
2448+
24262449
std::pair<size_t, size_t>
24272450
BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2451+
// Use the original size for non-simple functions.
2452+
if (!BF.isSimple() || BF.isIgnored())
2453+
return std::make_pair(BF.getSize(), 0);
2454+
24282455
// Adjust branch instruction to match the current layout.
24292456
if (FixBranches)
24302457
BF.fixBranches();

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,14 @@ void BinaryEmitter::emitFunctions() {
271271

272272
if (Emitted)
273273
Function->setEmitted(/*KeepCFG=*/opts::PrintCacheMetrics);
274+
275+
// Emit thunks.
276+
if (BC.getThunkLocation() != Function)
277+
continue;
278+
279+
for (BinaryFunction *Thunk : BC.getThunkBinaryFunctions()) {
280+
emitFunction(*Thunk, Thunk->getLayout().getMainFragment());
281+
}
274282
}
275283
};
276284

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ cl::opt<bool>
114114
cl::desc("try to preserve basic block alignment"),
115115
cl::cat(BoltOptCategory));
116116

117+
static cl::opt<bool> PrintOffsets("print-offsets",
118+
cl::desc("print basic block offsets"),
119+
cl::Hidden, cl::cat(BoltOptCategory));
120+
117121
static cl::opt<bool> PrintOutputAddressRange(
118122
"print-output-address-range",
119123
cl::desc(
@@ -558,6 +562,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
558562
if (BB->isLandingPad())
559563
OS << " Landing Pad\n";
560564

565+
if (opts::PrintOffsets && BB->getOutputStartAddress()) {
566+
OS << " OutputOffset: 0x"
567+
<< Twine::utohexstr(BB->getOutputStartAddress()) << '\n';
568+
}
569+
561570
uint64_t BBExecCount = BB->getExecutionCount();
562571
if (hasValidProfile()) {
563572
OS << " Exec Count : ";
@@ -3584,6 +3593,8 @@ bool BinaryFunction::validateCFG() const {
35843593
}
35853594

35863595
void BinaryFunction::fixBranches() {
3596+
assert(isSimple() && "Expected function with valid CFG.");
3597+
35873598
auto &MIB = BC.MIB;
35883599
MCContext *Ctx = BC.Ctx.get();
35893600

bolt/lib/Passes/Aligner.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ static void alignCompact(BinaryFunction &Function,
7777
size_t HotSize = 0;
7878
size_t ColdSize = 0;
7979

80+
if (!Function.hasProfile() && BC.isAArch64()) {
81+
Function.setAlignment(Function.getMinAlignment());
82+
return;
83+
}
84+
8085
for (const BinaryBasicBlock &BB : Function)
8186
if (BB.isSplit())
8287
ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,8 @@ Error AssignSections::runOnFunctions(BinaryContext &BC) {
12871287
if (opts::isHotTextMover(Function)) {
12881288
Function.setCodeSectionName(BC.getHotTextMoverSectionName());
12891289
Function.setColdCodeSectionName(BC.getHotTextMoverSectionName());
1290+
// TODO: find a better place to mark a function as a mover.
1291+
Function.setHotTextMover(true);
12901292
continue;
12911293
}
12921294

0 commit comments

Comments
 (0)