Skip to content
133 changes: 84 additions & 49 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,41 +775,85 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds) {
bool IsReturn = false;
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
Addr -= Func->getAddress();
if (IsFrom) {
auto checkReturn = [&](auto MaybeInst) {
IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst);
};
if (Func->hasInstructions())
checkReturn(Func->getInstructionAtOffset(Addr));
else
checkReturn(Func->disassembleInstructionAtOffset(Addr));
}
// Returns whether \p Offset in \p Func contains a return instruction.
auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
return Func.hasInstructions()
? isReturn(Func.getInstructionAtOffset(Offset))
: isReturn(Func.disassembleInstructionAtOffset(Offset));
};

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
// Returns whether \p Offset in \p Func corresponds to a call continuation
// fallthrough block.
auto checkCallCont = [&](BinaryFunction &Func, const uint64_t Offset) {
// Note the use of MCInstrAnalysis: no call continuation for a tail call.
auto isCall = [&](auto MI) { return MI && BC->MIA->isCall(*MI); };

// No call continuation at a function start.
if (!Offset)
return false;

// FIXME: support BAT case where the function might be in empty state
// (split fragments declared non-simple).
if (!Func.hasCFG())
return false;

// The offset should not be an entry point or a landing pad.
const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
if (!ContBB || ContBB->isEntryPoint() || ContBB->isLandingPad())
return false;

// Check that preceding instruction is a call.
const BinaryBasicBlock *CallBB =
Func.getBasicBlockContainingOffset(Offset - 1);
if (!CallBB || CallBB == ContBB)
return false;
return isCall(CallBB->getLastNonPseudoInstr());
};

if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
Func = ParentFunc;
if (IsFrom)
NumColdSamples += Count;
}
// Mutates \p Addr to an offset into the containing function, performing BAT
// offset translation and parent lookup.
//
// Returns the containing function (or BAT parent) and whether the address
// corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
if (!Func)
return std::pair{Func, false};

return Func;
}
return nullptr;
Addr -= Func->getAddress();

bool IsRetOrCallCont =
IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);

BinaryFunction *ParentFunc = getBATParentFunction(*Func);
if (!ParentFunc)
return std::pair{Func, IsRetOrCallCont};

if (IsFrom)
NumColdSamples += Count;

return std::pair{ParentFunc, IsRetOrCallCont};
};

BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
uint64_t ToOrig = To;
auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom=*/true);
auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom=*/false);
if (!FromFunc && !ToFunc)
return false;

// Record call to continuation trace.
if (IsCallCont && FromFunc != ToFunc) {
LBREntry First{ToOrig - 1, ToOrig - 1, false};
LBREntry Second{ToOrig, ToOrig, false};
return doTrace(First, Second, Count);
}
// Ignore returns.
if (IsReturn)
return true;
BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
if (!FromFunc && !ToFunc)
return false;

// Treat recursive control transfers as inter-branches.
if (FromFunc == ToFunc && To != 0) {
Expand All @@ -826,10 +870,19 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
if (!FromFunc || !ToFunc) {
LLVM_DEBUG({
dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
<< formatv(" @ {0:x}", First.To - FromFunc->getAddress())
<< " and ending in " << ToFunc->getPrintName()
<< formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
dbgs() << "Out of range trace starting in ";
if (FromFunc)
dbgs() << formatv("{0} @ {1:x}", *FromFunc,
First.To - FromFunc->getAddress());
else
dbgs() << Twine::utohexstr(First.To);
dbgs() << " and ending in ";
if (ToFunc)
dbgs() << formatv("{0} @ {1:x}", *ToFunc,
Second.From - ToFunc->getAddress());
else
dbgs() << Twine::utohexstr(Second.From);
dbgs() << '\n';
});
NumLongRangeTraces += Count;
return false;
Expand Down Expand Up @@ -903,24 +956,6 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
if (!FromBB || !ToBB)
return std::nullopt;

// Adjust FromBB if the first LBR is a return from the last instruction in
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For reference to aid in review the code being removed here comes from 5599c01

// the previous block (that instruction should be a call).
if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
!FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
const BinaryBasicBlock *PrevBB =
BF.getLayout().getBlock(FromBB->getIndex() - 1);
if (PrevBB->getSuccessor(FromBB->getLabel())) {
const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
if (Instr && BC.MIB->isCall(*Instr))
FromBB = PrevBB;
else
LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
<< '\n');
} else {
LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
}
}

// Fill out information for fall-through edges. The From and To could be
// within the same basic block, e.g. when two call instructions are in the
// same block. In this case we skip the processing.
Expand Down
60 changes: 60 additions & 0 deletions bolt/test/X86/callcont-fallthru.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
## Ensures that a call continuation fallthrough count is set when using
## pre-aggregated perf data.

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: link_fdata %s %t.exe %t.pa PREAGG
# RUN: llvm-strip --strip-unneeded %t.exe
# RUN: llvm-bolt %t.exe --pa -p %t.pa -o %t.out \
# RUN: --print-cfg --print-only=main | FileCheck %s

.globl foo
.type foo, %function
foo:
pushq %rbp
movq %rsp, %rbp
popq %rbp
Lfoo_ret:
retq
.size foo, .-foo

.globl main
.type main, %function
main:
pushq %rbp
movq %rsp, %rbp
subq $0x20, %rsp
movl $0x0, -0x4(%rbp)
movl %edi, -0x8(%rbp)
movq %rsi, -0x10(%rbp)
movq -0x10(%rbp), %rax
movq 0x8(%rax), %rdi
movl %eax, -0x14(%rbp)

Ltmp4:
cmpl $0x0, -0x14(%rbp)
je Ltmp0

movl $0xa, -0x18(%rbp)
callq foo
# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0
# CHECK: callq foo
# CHECK-NEXT: count: 1

Ltmp3:
cmpl $0x0, -0x18(%rbp)
jmp Ltmp2

Ltmp2:
movl -0x18(%rbp), %eax
addl $-0x1, %eax
movl %eax, -0x18(%rbp)
jmp Ltmp3
jmp Ltmp4

Ltmp0:
xorl %eax, %eax
addq $0x20, %rsp
popq %rbp
retq
.size main, .-main
Loading