Skip to content
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,10 @@ class BinaryFunction {
return BB && BB->getOffset() == Offset ? BB : nullptr;
}

const BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) const {
return const_cast<BinaryFunction *>(this)->getBasicBlockAtOffset(Offset);
}

/// Retrieve the landing pad BB associated with invoke instruction \p Invoke
/// that is in \p BB. Return nullptr if none exists
BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB,
Expand Down
3 changes: 2 additions & 1 deletion bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ class DataAggregator : public DataReader {
uint64_t Mispreds);

/// Register a \p Branch.
bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds,
bool IsPreagg);

/// Register a trace between two LBR entries supplied in execution order.
bool doTrace(const LBREntry &First, const LBREntry &Second,
Expand Down
111 changes: 77 additions & 34 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,42 +778,75 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
}

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds) {
bool IsReturn = false;
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
Addr -= Func->getAddress();
if (IsFrom) {
auto checkReturn = [&](auto MaybeInst) {
IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst);
};
if (Func->hasInstructions())
checkReturn(Func->getInstructionAtOffset(Addr));
else
checkReturn(Func->disassembleInstructionAtOffset(Addr));
}
uint64_t Mispreds, bool IsPreagg) {
// Returns whether \p Offset in \p Func contains a return instruction.
auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
return Func.hasInstructions()
? isReturn(Func.getInstructionAtOffset(Offset))
: isReturn(Func.disassembleInstructionAtOffset(Offset));
};

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
// Returns whether \p Offset in \p Func may be a call continuation excluding
// entry points and landing pads.
auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
// No call continuation at a function start.
if (!Offset)
return false;

// FIXME: support BAT case where the function might be in empty state
// (split fragments declared non-simple).
if (!Func.hasCFG())
return false;

// The offset should not be an entry point or a landing pad.
const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
};

if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
Func = ParentFunc;
if (IsFrom)
NumColdSamples += Count;
}
// Mutates \p Addr to an offset into the containing function, performing BAT
// offset translation and parent lookup.
//
// Returns the containing function (or BAT parent) and whether the address
// corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
if (!Func)
return std::pair{Func, false};

return Func;
}
return nullptr;
Addr -= Func->getAddress();

bool IsRetOrCallCont =
IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);

BinaryFunction *ParentFunc = getBATParentFunction(*Func);
if (!ParentFunc)
return std::pair{Func, IsRetOrCallCont};

if (IsFrom)
NumColdSamples += Count;

return std::pair{ParentFunc, IsRetOrCallCont};
};

BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
uint64_t ToOrig = To;
auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false);
if (!FromFunc && !ToFunc)
return false;

// Record call to continuation trace.
if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
LBREntry First{ToOrig - 1, ToOrig - 1, false};
LBREntry Second{ToOrig, ToOrig, false};
return doTrace(First, Second, Count);
}
// Ignore returns.
if (IsReturn)
return true;
BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
if (!FromFunc && !ToFunc)
return false;

// Treat recursive control transfers as inter-branches.
if (FromFunc == ToFunc && To != 0) {
Expand All @@ -830,10 +863,19 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
if (!FromFunc || !ToFunc) {
LLVM_DEBUG({
dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
<< formatv(" @ {0:x}", First.To - FromFunc->getAddress())
<< " and ending in " << ToFunc->getPrintName()
<< formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
dbgs() << "Out of range trace starting in ";
if (FromFunc)
dbgs() << formatv("{0} @ {1:x}", *FromFunc,
First.To - FromFunc->getAddress());
else
dbgs() << Twine::utohexstr(First.To);
dbgs() << " and ending in ";
if (ToFunc)
dbgs() << formatv("{0} @ {1:x}", *ToFunc,
Second.From - ToFunc->getAddress());
else
dbgs() << Twine::utohexstr(Second.From);
dbgs() << '\n';
});
NumLongRangeTraces += Count;
return false;
Expand Down Expand Up @@ -1620,7 +1662,8 @@ void DataAggregator::processBranchEvents() {
for (const auto &AggrLBR : BranchLBRs) {
const Trace &Loc = AggrLBR.first;
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
/*IsPreagg*/ false);
}
}

Expand Down Expand Up @@ -1781,7 +1824,7 @@ void DataAggregator::processPreAggregated() {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
AggrEntry.Mispreds);
AggrEntry.Mispreds, /*IsPreagg*/ true);
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
Expand Down
132 changes: 132 additions & 0 deletions bolt/test/X86/callcont-fallthru.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
## Ensures that a call continuation fallthrough count is set when using
## pre-aggregated perf data.

# RUN: %clangxx %cxxflags %s -o %t -Wl,-q -nostdlib
# RUN: link_fdata %s %t %t.pa1 PREAGG
# RUN: link_fdata %s %t %t.pa2 PREAGG2
# RUN: link_fdata %s %t %t.pa3 PREAGG3
# RUN: link_fdata %s %t %t.pa4 PREAGG4

## Check normal case: fallthrough is not LP or secondary entry.
# RUN: llvm-strip --strip-unneeded %t -o %t.exe
# RUN: llvm-bolt %t.exe --pa -p %t.pa1 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s

## Check that getFallthroughsInTrace correctly handles a trace starting at plt
## call continuation
# RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2

## Check that we don't treat secondary entry points as call continuation sites.
# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3

## Check fallthrough to a landing pad case.
# RUN: llvm-bolt %t.exe --pa -p %t.pa4 -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK4

.globl foo
.type foo, %function
foo:
pushq %rbp
movq %rsp, %rbp
popq %rbp
Lfoo_ret:
retq
.size foo, .-foo

.globl main
.type main, %function
main:
.Lfunc_begin0:
.cfi_startproc
.cfi_personality 155, DW.ref.__gxx_personality_v0
.cfi_lsda 27, .Lexception0
pushq %rbp
movq %rsp, %rbp
subq $0x20, %rsp
movl $0x0, -0x4(%rbp)
movl %edi, -0x8(%rbp)
movq %rsi, -0x10(%rbp)
callq puts@PLT
## Target is a call continuation
# PREAGG: B X:0 #Ltmp1# 2 0
# CHECK: callq puts@PLT
# CHECK-NEXT: count: 2

Ltmp1:
movq -0x10(%rbp), %rax
movq 0x8(%rax), %rdi
movl %eax, -0x14(%rbp)

Ltmp4:
cmpl $0x0, -0x14(%rbp)
je Ltmp0
# CHECK2: je .Ltmp0
# CHECK2-NEXT: count: 3

movl $0xa, -0x18(%rbp)
callq foo
## Target is a call continuation
# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0
# CHECK: callq foo
# CHECK-NEXT: count: 1

## PLT call continuation fallthrough spanning the call
# PREAGG2: F #Ltmp1# #Ltmp3_br# 3
# CHECK2: callq foo
# CHECK2-NEXT: count: 3

## Target is a secondary entry point
# PREAGG3: B X:0 #Ltmp3# 2 0
# CHECK3: callq foo
# CHECK3-NEXT: count: 0

## Target is a landing pad
# PREAGG4: B X:0 #Ltmp3# 2 0
# CHECK4: callq puts@PLT
# CHECK4-NEXT: count: 0

Ltmp3:
cmpl $0x0, -0x18(%rbp)
Ltmp3_br:
jmp Ltmp2

Ltmp2:
movl -0x18(%rbp), %eax
addl $-0x1, %eax
movl %eax, -0x18(%rbp)
jmp Ltmp3
jmp Ltmp4
jmp Ltmp1

Ltmp0:
xorl %eax, %eax
addq $0x20, %rsp
popq %rbp
retq
.Lfunc_end0:
.cfi_endproc
.size main, .-main

.section .gcc_except_table,"a",@progbits
.p2align 2, 0x0
GCC_except_table0:
.Lexception0:
.byte 255 # @LPStart Encoding = omit
.byte 255 # @TType Encoding = omit
.byte 1 # Call site Encoding = uleb128
.uleb128 .Lcst_end0-.Lcst_begin0
.Lcst_begin0:
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
.uleb128 .Lfunc_end0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Lfunc_end0
.uleb128 Ltmp3-.Lfunc_begin0 # jumps to Ltmp3
.byte 0 # has no landing pad
.byte 0 # On action: cleanup
.Lcst_end0:
.p2align 2, 0x0
.hidden DW.ref.__gxx_personality_v0
.weak DW.ref.__gxx_personality_v0
.section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
.p2align 3, 0x0
.type DW.ref.__gxx_personality_v0,@object