Skip to content

Commit f263a66

Browse files
committed
[BOLT] Split functions with exceptions in shared objects and PIEs
Add functionality to allow splitting code with C++ exceptions in shared libraries and PIEs. To overcome a limitation in exception ranges format, for functions with fragments spanning multiple sections, add trampoline landing pads in the same section as the corresponding throwing range. Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D127936
1 parent cc65f3e commit f263a66

File tree

8 files changed

+132
-21
lines changed

8 files changed

+132
-21
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,9 +1046,14 @@ class MCPlusBuilder {
10461046
/// Return handler and action info for invoke instruction if present.
10471047
Optional<MCPlus::MCLandingPad> getEHInfo(const MCInst &Inst) const;
10481048

1049-
// Add handler and action info for call instruction.
1049+
/// Add handler and action info for call instruction.
10501050
void addEHInfo(MCInst &Inst, const MCPlus::MCLandingPad &LP);
10511051

1052+
/// Update exception-handling info for the invoke instruction \p Inst.
1053+
/// Return true on success and false otherwise, e.g. if the instruction is
1054+
/// not an invoke.
1055+
bool updateEHInfo(MCInst &Inst, const MCPlus::MCLandingPad &LP);
1056+
10521057
/// Return non-negative GNU_args_size associated with the instruction
10531058
/// or -1 if there's no associated info.
10541059
int64_t getGnuArgsSize(const MCInst &Inst) const;

bolt/include/bolt/Passes/SplitFunctions.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ class SplitFunctions : public BinaryFunctionPass {
3131
/// Split function body into fragments.
3232
void splitFunction(BinaryFunction &Function);
3333

34+
/// Create trampoline landing pads for exception handling code to guarantee
35+
/// that every landing pad is placed in the same function fragment as the
36+
/// corresponding thrower block. The trampoline landing pad, when created,
37+
/// will redirect the execution to the real landing pad in a different
38+
/// fragment.
39+
void createEHTrampolines(BinaryFunction &Function) const;
40+
3441
std::atomic<uint64_t> SplitBytesHot{0ull};
3542
std::atomic<uint64_t> SplitBytesCold{0ull};
3643

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -912,8 +912,8 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, bool EmitColdPart) {
912912
// defined in the same section and hence cannot place the landing pad into a
913913
// cold fragment when the corresponding call site is in the hot fragment.
914914
// Because of this issue and the previously described issue of possible
915-
// zero-offset landing pad we disable splitting of exception-handling
916-
// code for shared objects.
915+
// zero-offset landing pad we have to place landing pads in the same section
916+
// as the corresponding invokes for shared objects.
917917
std::function<void(const MCSymbol *)> emitLandingPad;
918918
if (BC.HasFixedLoadAddress) {
919919
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
@@ -925,8 +925,6 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, bool EmitColdPart) {
925925
Streamer.emitSymbolValue(LPSymbol, 4);
926926
};
927927
} else {
928-
assert(!EmitColdPart &&
929-
"cannot have exceptions in cold fragment for shared object");
930928
Streamer.emitIntValue(dwarf::DW_EH_PE_omit, 1); // LPStart format
931929
emitLandingPad = [&](const MCSymbol *LPSymbol) {
932930
if (!LPSymbol)

bolt/lib/Core/MCPlusBuilder.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,17 @@ void MCPlusBuilder::addEHInfo(MCInst &Inst, const MCLandingPad &LP) {
159159
}
160160
}
161161

162+
bool MCPlusBuilder::updateEHInfo(MCInst &Inst, const MCLandingPad &LP) {
163+
if (!isInvoke(Inst))
164+
return false;
165+
166+
setAnnotationOpValue(Inst, MCAnnotation::kEHLandingPad,
167+
reinterpret_cast<int64_t>(LP.first));
168+
setAnnotationOpValue(Inst, MCAnnotation::kEHAction,
169+
static_cast<int64_t>(LP.second));
170+
return true;
171+
}
172+
162173
int64_t MCPlusBuilder::getGnuArgsSize(const MCInst &Inst) const {
163174
Optional<int64_t> Value =
164175
getAnnotationOpValue(Inst, MCAnnotation::kGnuArgsSize);

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 76 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
118118

119119
bool AllCold = true;
120120
for (BinaryBasicBlock *BB : BF.layout()) {
121-
uint64_t ExecCount = BB->getExecutionCount();
121+
const uint64_t ExecCount = BB->getExecutionCount();
122122
if (ExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
123123
return;
124124
if (ExecCount != 0)
@@ -140,12 +140,12 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
140140
<< " pre-split is <0x"
141141
<< Twine::utohexstr(OriginalHotSize) << ", 0x"
142142
<< Twine::utohexstr(ColdSize) << ">\n");
143-
}
144-
145-
if (opts::SplitFunctions == SplitFunctions::ST_LARGE && !BC.HasRelocations) {
146-
// Split only if the function wouldn't fit.
147-
if (OriginalHotSize <= BF.getMaxSize())
148-
return;
143+
if (opts::SplitFunctions == SplitFunctions::ST_LARGE &&
144+
!BC.HasRelocations) {
145+
// Split only if the function wouldn't fit.
146+
if (OriginalHotSize <= BF.getMaxSize())
147+
return;
148+
}
149149
}
150150

151151
// Never outline the first basic block.
@@ -164,9 +164,9 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
164164
BB->setCanOutline(false);
165165
continue;
166166
}
167+
167168
if (BF.hasEHRanges() && !opts::SplitEH) {
168-
// We cannot move landing pads (or rather entry points for landing
169-
// pads).
169+
// We cannot move landing pads (or rather entry points for landing pads).
170170
if (BB->isLandingPad()) {
171171
BB->setCanOutline(false);
172172
continue;
@@ -176,7 +176,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
176176
// that the block never throws, it is safe to move the block to
177177
// decrease the size of the function.
178178
for (MCInst &Instr : *BB) {
179-
if (BF.getBinaryContext().MIB->isInvoke(Instr)) {
179+
if (BC.MIB->isInvoke(Instr)) {
180180
BB->setCanOutline(false);
181181
break;
182182
}
@@ -214,6 +214,12 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
214214
BB->setIsCold(true);
215215
}
216216

217+
// For shared objects, place invoke instructions and corresponding landing
218+
// pads in the same fragment. To reduce hot code size, create trampoline
219+
// landing pads that will redirect the execution to the real LP.
220+
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
221+
createEHTrampolines(BF);
222+
217223
// Check the new size to see if it's worth splitting the function.
218224
if (BC.isX86() && BF.isSplit()) {
219225
std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
@@ -237,5 +243,65 @@ void SplitFunctions::splitFunction(BinaryFunction &BF) {
237243
}
238244
}
239245

246+
void SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
247+
const auto &MIB = BF.getBinaryContext().MIB;
248+
249+
// Map real landing pads to the corresponding trampolines.
250+
std::unordered_map<const MCSymbol *, const MCSymbol *> LPTrampolines;
251+
252+
// Iterate over the copy of basic blocks since we are adding new blocks to the
253+
// function which will invalidate its iterators.
254+
std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend());
255+
for (BinaryBasicBlock *BB : Blocks) {
256+
for (MCInst &Instr : *BB) {
257+
const Optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Instr);
258+
if (!EHInfo || !EHInfo->first)
259+
continue;
260+
261+
const MCSymbol *LPLabel = EHInfo->first;
262+
BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
263+
if (BB->isCold() == LPBlock->isCold())
264+
continue;
265+
266+
const MCSymbol *TrampolineLabel = nullptr;
267+
auto Iter = LPTrampolines.find(LPLabel);
268+
if (Iter != LPTrampolines.end()) {
269+
TrampolineLabel = Iter->second;
270+
} else {
271+
// Create a trampoline basic block in the same fragment as the thrower.
272+
// Note: there's no need to insert the jump instruction, it will be
273+
// added by fixBranches().
274+
BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
275+
TrampolineBB->setIsCold(BB->isCold());
276+
TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
277+
TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
278+
TrampolineBB->setCFIState(LPBlock->getCFIState());
279+
TrampolineLabel = TrampolineBB->getLabel();
280+
LPTrampolines.emplace(std::make_pair(LPLabel, TrampolineLabel));
281+
}
282+
283+
// Substitute the landing pad with the trampoline.
284+
MIB->updateEHInfo(Instr,
285+
MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second));
286+
}
287+
}
288+
289+
if (LPTrampolines.empty())
290+
return;
291+
292+
// All trampoline blocks were added to the end of the function. Place them at
293+
// the end of corresponding fragments.
294+
std::stable_sort(BF.layout_begin(), BF.layout_end(),
295+
[&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
296+
return A->isCold() < B->isCold();
297+
});
298+
299+
// Conservatively introduce branch instructions.
300+
BF.fixBranches();
301+
302+
// Update exception-handling CFG for the function.
303+
BF.recomputeLandingPads();
304+
}
305+
240306
} // namespace bolt
241307
} // namespace llvm

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1686,11 +1686,6 @@ void RewriteInstance::adjustCommandLineOptions() {
16861686
opts::SplitEH = false;
16871687
}
16881688

1689-
if (opts::SplitEH && !BC->HasFixedLoadAddress) {
1690-
errs() << "BOLT-WARNING: disabling -split-eh for shared object\n";
1691-
opts::SplitEH = false;
1692-
}
1693-
16941689
if (opts::StrictMode && !BC->HasRelocations) {
16951690
errs() << "BOLT-WARNING: disabling strict mode (-strict) in non-relocation "
16961691
"mode\n";

bolt/test/runtime/X86/Inputs/exceptions_split.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ int main(int argc, char **argv)
2424
{
2525
unsigned r = 0;
2626

27-
uint64_t limit = (argc >= 2 ? 10 : 500000000);
27+
uint64_t limit = (argc >= 2 ? 10 : 5000);
2828
for (uint64_t i = 0; i < limit; ++i) {
2929
i += foo();
3030
try {
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
## Check that BOLT successfully splits C++ exception-handling code for
2+
## PIEs or shared objects.
3+
4+
REQUIRES: system-linux
5+
6+
RUN: %clangxx %cxxflags -pie -fPIC %p/Inputs/exceptions_split.cpp -Wl,-q -o %t
7+
RUN: llvm-bolt %t -o %t.instr --instrument --instrumentation-file=%t.fdata
8+
9+
## Record profile with invocation that does not throw exceptions.
10+
RUN: %t.instr
11+
12+
RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
13+
RUN: --split-functions=1 --split-eh --print-after-lowering \
14+
RUN: --print-only=main 2>&1 | FileCheck %s
15+
16+
## All calls to printf() should be from exception handling code that was
17+
## recorded as cold during the profile collection run. Check that the calls
18+
## are placed after the split point.
19+
CHECK-NOT: callq printf
20+
CHECK: HOT-COLD SPLIT POINT
21+
CHECK: callq printf
22+
23+
## Verify the output still executes correctly when the exception path is being
24+
## taken.
25+
RUN: %t.bolt arg1 arg2 arg3 2>&1 | FileCheck --check-prefix=CHECK-BOLTED %s
26+
27+
CHECK-BOLTED: catch 2
28+
CHECK-BOLTED-NEXT: catch 1
29+

0 commit comments

Comments
 (0)