Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,11 @@ class BinaryFunction {
/// fragment of the function.
SmallVector<MCSymbol *, 0> LSDASymbols;

/// Each function fragment may have another fragment containing all landing
/// pads for it. If that's the case, the LP fragment will be stored in the
/// vector below with indexing starting with the main fragment.
SmallVector<std::optional<FragmentNum>, 0> LPFragments;

/// Map to discover which CFIs are attached to a given instruction offset.
/// Maps an instruction offset into a FrameInstructions offset.
/// This is only relevant to the buildCFG phase and is discarded afterwards.
Expand Down Expand Up @@ -1885,6 +1890,42 @@ class BinaryFunction {
return LSDASymbols[F.get()];
}

/// If all landing pads for the function fragment \p F are located in fragment
/// \p LPF, designate \p LPF as a landing-pad fragment for \p F. Passing
/// std::nullopt in LPF, means that landing pads for \p F are located in more
/// than one fragment.
void setLPFragment(const FragmentNum F, std::optional<FragmentNum> LPF) {
if (F.get() >= LPFragments.size())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably I'm missing something here -- but why resize + [] instead of insert?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we are growing the vector on demand? Now I'm thinking if I'm missing something.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SG, nevermind.

LPFragments.resize(F.get() + 1);

LPFragments[F.get()] = LPF;
}

/// If function fragment \p F has a designated landing pad fragment, i.e. a
/// fragment that contains all landing pads for throwers in \p F, then return
/// that landing pad fragment number. If \p F does not need landing pads,
/// return \p F. Return nullptr if landing pads for \p F are scattered among
/// several function fragments.
std::optional<FragmentNum> getLPFragment(const FragmentNum F) {
if (!isSplit()) {
assert(F == FragmentNum::main() && "Invalid fragment number");
return FragmentNum::main();
}

if (F.get() >= LPFragments.size())
return std::nullopt;

return LPFragments[F.get()];
}

/// Return a symbol corresponding to a landing pad fragment for fragment \p F.
/// See getLPFragment().
MCSymbol *getLPStartSymbol(const FragmentNum F) {
if (std::optional<FragmentNum> LPFragment = getLPFragment(F))
return getSymbol(*LPFragment);
return nullptr;
}

void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }

uint64_t getOutputDataAddress() const { return OutputDataOffset; }
Expand Down
37 changes: 23 additions & 14 deletions bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class BinaryEmitter {

void emitCFIInstruction(const MCCFIInstruction &Inst) const;

/// Emit exception handling ranges for the function.
/// Emit exception handling ranges for the function fragment.
void emitLSDA(BinaryFunction &BF, const FunctionFragment &FF);

/// Emit line number information corresponding to \p NewLoc. \p PrevLoc
Expand Down Expand Up @@ -915,15 +915,15 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
// Emit the LSDA header.

// If LPStart is omitted, then the start of the FDE is used as a base for
// landing pad displacements. Then, if a cold fragment starts with a landing
// pad, this means that the first landing pad offset will be 0. However, C++
// runtime treats 0 as if there is no landing pad present, thus we *must* emit
// non-zero offsets for all valid LPs.
// landing pad displacements. Then, if a cold fragment starts with
// a landing pad, this means that the first landing pad offset will be 0.
// However, C++ runtime will treat 0 as if there is no landing pad, thus we
// cannot emit LP offset as 0.
//
// As a solution, for fixed-address binaries we set LPStart to 0, and for
// position-independent binaries we set LP start to FDE start minus one byte
// for FDEs that start with a landing pad.
const bool NeedsLPAdjustment = !FF.empty() && FF.front()->isLandingPad();
// position-independent binaries we offset LP start by one byte.
bool NeedsLPAdjustment = false;
const MCSymbol *LPStartSymbol = nullptr;
std::function<void(const MCSymbol *)> emitLandingPad;
if (BC.HasFixedLoadAddress) {
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
Expand All @@ -935,17 +935,26 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
Streamer.emitIntValue(0, 4);
};
} else {
if (NeedsLPAdjustment) {
// Use relative LPStart format and emit LPStart as [SymbolStart - 1].
std::optional<FragmentNum> LPFN = BF.getLPFragment(FF.getFragmentNum());
LPStartSymbol = BF.getLPStartSymbol(FF.getFragmentNum());
assert(LPFN && LPStartSymbol && "Expected LPStart symbol to be set");

const FunctionFragment &LPFragment = BF.getLayout().getFragment(*LPFN);
NeedsLPAdjustment =
(!LPFragment.empty() && LPFragment.front()->isLandingPad());

// Emit LPStart encoding and optionally LPStart.
if (NeedsLPAdjustment || LPStartSymbol != StartSymbol) {
Streamer.emitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
MCSymbol *DotSymbol = BC.Ctx->createTempSymbol("LPBase");
Streamer.emitLabel(DotSymbol);

const MCExpr *LPStartExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx),
MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx),
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx);
LPStartExpr = MCBinaryExpr::createSub(
LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
if (NeedsLPAdjustment)
LPStartExpr = MCBinaryExpr::createSub(
LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
Streamer.emitValue(LPStartExpr, 4);
} else {
// DW_EH_PE_omit means FDE start (StartSymbol) will be used as LPStart.
Expand All @@ -955,7 +964,7 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
if (LPSymbol) {
const MCExpr *LPOffsetExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(LPSymbol, *BC.Ctx),
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx), *BC.Ctx);
MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx), *BC.Ctx);
if (NeedsLPAdjustment)
LPOffsetExpr = MCBinaryExpr::createAdd(
LPOffsetExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
Expand Down
43 changes: 41 additions & 2 deletions bolt/lib/Passes/SplitFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,8 +901,43 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
// have to be placed in the same fragment. When we split them, create
// trampoline landing pads that will redirect the execution to real LPs.
TrampolineSetType Trampolines;
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
Trampolines = createEHTrampolines(BF);
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) {
// If all landing pads for this fragment are grouped in one (potentially
// different) fragment, we can set LPStart to the start of that fragment
// and avoid trampoline code.
bool NeedsTrampolines = false;
for (FunctionFragment &FF : BF.getLayout().fragments()) {
// Vector of fragments that contain landing pads for this fragment.
SmallVector<FragmentNum, 4> LandingPadFragments;
for (const BinaryBasicBlock *BB : FF)
for (const BinaryBasicBlock *LPB : BB->landing_pads())
LandingPadFragments.push_back(LPB->getFragmentNum());

// Eliminate duplicate entries from the vector.
llvm::sort(LandingPadFragments);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be useful to add a comment on why this sorting is needed

auto Last = llvm::unique(LandingPadFragments);
LandingPadFragments.erase(Last, LandingPadFragments.end());

if (LandingPadFragments.size() == 0) {
// If the fragment has no landing pads, we can safely set itself as its
// landing pad fragment.
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
} else if (LandingPadFragments.size() == 1) {
BF.setLPFragment(FF.getFragmentNum(), LandingPadFragments.front());
} else {
NeedsTrampolines = true;
break;
}
}

// Trampolines guarantee that all landing pads for any given fragment will
// be contained in the same fragment.
if (NeedsTrampolines) {
for (FunctionFragment &FF : BF.getLayout().fragments())
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
Trampolines = createEHTrampolines(BF);
}
}

// Check the new size to see if it's worth splitting the function.
if (BC.isX86() && LayoutUpdated) {
Expand Down Expand Up @@ -933,6 +968,10 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
}
}

// Restore LP fragment for the main fragment if the splitting was undone.
if (BF.hasEHRanges() && !BF.isSplit())
BF.setLPFragment(FragmentNum::main(), FragmentNum::main());

// Fix branches if the splitting decision of the pass after function
// reordering is different from that of the pass before function reordering.
if (LayoutUpdated && BC.HasFinalizedFunctionOrder)
Expand Down
86 changes: 86 additions & 0 deletions bolt/test/X86/pie-eh-split-undo.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: ld.lld --pie %t.o -o %t.exe -q
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --split-functions --split-eh \
# RUN: --split-all-cold --print-after-lowering --print-only=_start 2>&1 \
# RUN: | FileCheck %s

## _start has two landing pads: one hot and one cold. Hence, BOLT will introduce
## a landing pad trampoline. However, the trampoline code will make the main
## split fragment larger than the whole function before split. Then BOLT will
## undo the splitting and remove the trampoline.

# CHECK: Binary Function "_start"
# CHECK: IsSplit :
# CHECK-SAME: 0

## Check that a landing pad trampoline was created, but contains no instructions
## and falls though to the real landing pad.

# CHECK: {{^[^[:space:]]+}} (0 instructions
# CHECK-NEXT: Landing Pad{{$}}
# CHECK: Exec Count
# CHECK-SAME: : 0
# CHECK: Successors:
# CHECK-SAME: [[LP:[^[:space:]]+]]
# CHECK-EMPTY:
# CHECK-NEXT: [[LP]]

.text
.global foo
.type foo, %function
foo:
.cfi_startproc
ret
.cfi_endproc
.size foo, .-foo

.globl _start
.type _start, %function
_start:
# FDATA: 0 [unknown] 0 1 _start 0 1 100
.Lfunc_begin0:
.cfi_startproc
.cfi_lsda 27, .Lexception0
call foo
.Ltmp0:
call foo
.Ltmp1:
ret

## Cold landing pad.
.LLP1:
ret

## Hot landing pad.
LLP0:
# FDATA: 0 [unknown] 0 1 _start #LLP0# 1 100
ret

.cfi_endproc
.Lfunc_end0:
.size _start, .-_start

## EH table.
.section .gcc_except_table,"a",@progbits
.p2align 2
GCC_except_table0:
.Lexception0:
.byte 255 # @LPStart Encoding = omit
.byte 255 # @TType Encoding = omit
.byte 1 # Call site Encoding = uleb128
.uleb128 .Lcst_end0-.Lcst_begin0
.Lcst_begin0:
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
.uleb128 .Ltmp0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Ltmp0
.uleb128 LLP0-.Lfunc_begin0 # jumps to LLP0
.byte 0 # On action: cleanup
.uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 2 <<
.uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1
.uleb128 .LLP1-.Lfunc_begin0 # jumps to .LLP1
.byte 0 # On action: cleanup
.Lcst_end0:

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Assembly generated from building the followingC++ code with the following
# Assembly generated from building the following C++ code with the following
# command using trunk clang. Then, basic block at .LBB1_7 was moved before the
# landing pad.
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,16 @@ RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
RUN: --split-functions --split-eh --print-after-lowering \
RUN: --print-only=_Z10throw_testiPPc 2>&1 | FileCheck %s

## Hot code in the test case gets larger after splitting because of jump
## instruction relaxation. Check that BOLT reverts the split correctly.
## Check that a landing pad is split from its thrower and does not require a
## trampoline LP.
CHECK: Binary Function "_Z10throw_testiPPc"
CHECK: IsSplit :
CHECK-SAME: 0

## Check that the landing pad trampoline was created, but contains no
## instructions and falls to the real landing pad.
CHECK: {{^[^[:space:]]+}} (0 instructions
CHECK-NEXT: Landing Pad{{$}}
CHECK: Exec Count
CHECK-SAME: : 0
CHECK: Successors:
CHECK-SAME: [[LP:[^[:space:]]+]]
CHECK-EMPTY:
CHECK-NEXT: [[LP]]
CHECK-DAG: Exec Count
CHECK-NOT: Exec Count
CHECK-DAG: callq __cxa_begin_catch
CHECK-SAME: 1
CHECK: callq {{.*}} # handler: [[LPAD:.*]];
CHECK-NOT: Landing Pad{{$}}
CHECK: HOT-COLD SPLIT POINT
CHECK: {{^}}[[LPAD]]
CHECK-NEXT: Landing Pad

## Verify the output still executes correctly when the exception path is being
## taken.
Expand Down
Loading