Skip to content

Commit dfbc472

Browse files
spupyrevmemfrob
authored andcommitted
[BOLT] Custom function alignment
Summary: A new 'compact' function aligner that takes function sizes in consideration. The approach is based on the following assumptions: -- It is not desirable to introduce a large offset when aligning short functions, as it leads to a lot of "wasted" address space. -- For longer functions, the offset can be larger than the default 32 bytes; However, using 64 bytes for the offset still worsen performance, as again a lot of address space is wasted. -- Cold parts of functions can still use the default max-32 offset. The algorithm is switched on/off by flag 'use-compact-aligner' and is controlled by parameters align-functions-max-bytes and align-cold-functions-max-bytes described above. In my tests the best performance is produced with '-use-compact-aligner=true -align-functions-max-bytes=48 -align-cold-functions-max-bytes=32'. (cherry picked from FBD6194092)
1 parent ba3f372 commit dfbc472

File tree

6 files changed

+175
-19
lines changed

6 files changed

+175
-19
lines changed

bolt/BinaryFunction.h

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,13 @@ class BinaryFunction {
241241
uint64_t MaxSize{std::numeric_limits<uint64_t>::max()};
242242

243243
/// Alignment requirements for the function.
244-
uint64_t Alignment{2};
244+
uint16_t Alignment{2};
245+
246+
/// Maximum number of bytes used for alignment of hot part of the function.
247+
uint16_t MaxAlignmentBytes{0};
248+
249+
/// Maximum number of bytes used for alignment of cold part of the function.
250+
uint16_t MaxColdAlignmentBytes{0};
245251

246252
const MCSymbol *PersonalityFunction{nullptr};
247253
uint8_t PersonalityEncoding{dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_pcrel};
@@ -1580,15 +1586,33 @@ class BinaryFunction {
15801586
return *this;
15811587
}
15821588

1583-
BinaryFunction &setAlignment(uint64_t Align) {
1589+
BinaryFunction &setAlignment(uint16_t Align) {
15841590
Alignment = Align;
15851591
return *this;
15861592
}
15871593

1588-
uint64_t getAlignment() const {
1594+
uint16_t getAlignment() const {
15891595
return Alignment;
15901596
}
15911597

1598+
BinaryFunction &setMaxAlignmentBytes(uint16_t MaxAlignBytes) {
1599+
MaxAlignmentBytes = MaxAlignBytes;
1600+
return *this;
1601+
}
1602+
1603+
uint16_t getMaxAlignmentBytes() const {
1604+
return MaxAlignmentBytes;
1605+
}
1606+
1607+
BinaryFunction &setMaxColdAlignmentBytes(uint16_t MaxAlignBytes) {
1608+
MaxColdAlignmentBytes = MaxAlignBytes;
1609+
return *this;
1610+
}
1611+
1612+
uint16_t getMaxColdAlignmentBytes() const {
1613+
return MaxColdAlignmentBytes;
1614+
}
1615+
15921616
BinaryFunction &setImageAddress(uint64_t Address) {
15931617
ImageAddress = Address;
15941618
return *this;

bolt/BinaryPassManager.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//===----------------------------------------------------------------------===//
1111

1212
#include "BinaryPassManager.h"
13+
#include "Passes/Aligner.h"
1314
#include "Passes/AllocCombiner.h"
1415
#include "Passes/FrameOptimizer.h"
1516
#include "Passes/IndirectCallPromotion.h"
@@ -393,6 +394,8 @@ void BinaryFunctionPassManager::runAllPasses(
393394
llvm::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
394395
opts::SimplifyConditionalTailCalls);
395396

397+
Manager.registerPass(llvm::make_unique<AlignerPass>());
398+
396399
// This pass should always run last.*
397400
Manager.registerPass(llvm::make_unique<FinalizeFunctions>(PrintFinalized));
398401

bolt/Passes/Aligner.cpp

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
//===--- Aligner.cpp ------------------------------------------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#include "Aligner.h"
13+
14+
using namespace llvm;
15+
16+
namespace opts {
17+
extern cl::OptionCategory BoltOptCategory;
18+
19+
extern cl::opt<bool> Relocs;
20+
21+
cl::opt<bool>
22+
UseCompactAligner("use-compact-aligner",
23+
cl::desc("Use compact approach for aligning functions"),
24+
cl::init(false),
25+
cl::ZeroOrMore,
26+
cl::cat(BoltOptCategory));
27+
28+
cl::opt<unsigned>
29+
AlignFunctions("align-functions",
30+
cl::desc("align functions at a given value (relocation mode)"),
31+
cl::init(64),
32+
cl::ZeroOrMore,
33+
cl::cat(BoltOptCategory));
34+
35+
cl::opt<unsigned>
36+
AlignFunctionsMaxBytes("align-functions-max-bytes",
37+
cl::desc("maximum number of bytes to use to align functions"),
38+
cl::init(32),
39+
cl::ZeroOrMore,
40+
cl::cat(BoltOptCategory));
41+
42+
} // end namespace opts
43+
44+
namespace llvm {
45+
namespace bolt {
46+
47+
namespace {
48+
49+
// Align function to the specified byte-boundary (typically, 64) offsetting
50+
// the fuction by not more than the corresponding value
51+
void alignMaxBytes(BinaryFunction &Function) {
52+
Function.setAlignment(opts::AlignFunctions);
53+
Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
54+
Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
55+
}
56+
57+
// Align function to the specified byte-boundary (typically, 64) offsetting
58+
// the fuction by not more than the minimum over
59+
// -- the size of the function
60+
// -- the specified number of bytes
61+
void alignCompact(BinaryContext &BC, BinaryFunction &Function) {
62+
size_t HotSize = 0;
63+
size_t ColdSize = 0;
64+
for (const auto *BB : Function.layout()) {
65+
if (BB->isCold())
66+
ColdSize += BC.computeCodeSize(BB->begin(), BB->end());
67+
else
68+
HotSize += BC.computeCodeSize(BB->begin(), BB->end());
69+
}
70+
71+
Function.setAlignment(opts::AlignFunctions);
72+
if (HotSize > 0)
73+
Function.setMaxAlignmentBytes(
74+
std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize));
75+
76+
// using the same option, max-align-bytes, both for cold and hot parts of the
77+
// functions, as aligning cold functions typically does not affect performance
78+
if (ColdSize > 0)
79+
Function.setMaxColdAlignmentBytes(
80+
std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize));
81+
}
82+
83+
} // end anonymous namespace
84+
85+
void AlignerPass::runOnFunctions(BinaryContext &BC,
86+
std::map<uint64_t, BinaryFunction> &BFs,
87+
std::set<uint64_t> &LargeFunctions) {
88+
if (!opts::Relocs)
89+
return;
90+
91+
for (auto &It : BFs) {
92+
auto &Function = It.second;
93+
if (opts::UseCompactAligner)
94+
alignCompact(BC, Function);
95+
else
96+
alignMaxBytes(Function);
97+
}
98+
}
99+
100+
} // end namespace bolt
101+
} // end namespace llvm

bolt/Passes/Aligner.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//===--------- Passes/Aligner.h -------------------------------------------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_ALIGNER_H
13+
#define LLVM_TOOLS_LLVM_BOLT_PASSES_ALIGNER_H
14+
15+
#include "BinaryPasses.h"
16+
17+
namespace llvm {
18+
namespace bolt {
19+
20+
class AlignerPass : public BinaryFunctionPass {
21+
public:
22+
explicit AlignerPass() : BinaryFunctionPass(false) {}
23+
24+
const char *getName() const override {
25+
return "aligner";
26+
}
27+
28+
/// Pass entry point
29+
void runOnFunctions(BinaryContext &BC,
30+
std::map<uint64_t, BinaryFunction> &BFs,
31+
std::set<uint64_t> &LargeFunctions) override;
32+
};
33+
34+
} // namespace bolt
35+
} // namespace llvm
36+
37+
38+
#endif

bolt/Passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
add_llvm_library(LLVMBOLTPasses
2+
Aligner.cpp
23
AllocCombiner.cpp
34
BinaryPasses.cpp
45
BinaryFunctionCallGraph.cpp

bolt/RewriteInstance.cpp

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -90,20 +90,6 @@ OutputFilename("o",
9090
cl::Required,
9191
cl::cat(BoltOutputCategory));
9292

93-
cl::opt<unsigned>
94-
AlignFunctions("align-functions",
95-
cl::desc("align functions at a given value (relocation mode)"),
96-
cl::init(64),
97-
cl::ZeroOrMore,
98-
cl::cat(BoltOptCategory));
99-
100-
cl::opt<unsigned>
101-
AlignFunctionsMaxBytes("align-functions-max-bytes",
102-
cl::desc("maximum number of bytes to use to align functions"),
103-
cl::init(32),
104-
cl::ZeroOrMore,
105-
cl::cat(BoltOptCategory));
106-
10793
cl::opt<bool>
10894
AllowStripped("allow-stripped",
10995
cl::desc("allow processing of stripped binaries"),
@@ -2190,8 +2176,11 @@ void RewriteInstance::emitFunction(MCStreamer &Streamer, BinaryFunction &Functio
21902176

21912177
if (opts::Relocs) {
21922178
Streamer.EmitCodeAlignment(BinaryFunction::MinAlign);
2193-
Streamer.EmitCodeAlignment(opts::AlignFunctions,
2194-
opts::AlignFunctionsMaxBytes);
2179+
auto MaxAlignBytes = EmitColdPart
2180+
? Function.getMaxColdAlignmentBytes()
2181+
: Function.getMaxAlignmentBytes();
2182+
if (MaxAlignBytes > 0)
2183+
Streamer.EmitCodeAlignment(Function.getAlignment(), MaxAlignBytes);
21952184
} else {
21962185
Streamer.EmitCodeAlignment(Function.getAlignment());
21972186
Streamer.setCodeSkew(EmitColdPart ? 0 : Function.getAddress());

0 commit comments

Comments
 (0)