Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,12 @@ class MCPlusBuilder {
return Analysis->isIndirectBranch(Inst);
}

bool IsUnconditionalJump(const MCInst &Inst) const {
const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
// barrier captures returns and unconditional branches
return Desc.isCall() || Desc.isBarrier();
}

/// Returns true if the instruction is memory indirect call or jump
virtual bool isBranchOnMem(const MCInst &Inst) const {
llvm_unreachable("not implemented");
Expand Down
43 changes: 42 additions & 1 deletion bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class DataAggregator : public DataReader {
std::vector<std::pair<Trace, TakenBranchInfo>> Traces;
/// Pre-populated addresses of returns, coming from pre-aggregated data or
/// disassembly. Used to disambiguate call-continuation fall-throughs.
std::unordered_set<uint64_t> Returns;
std::unordered_map<uint64_t, bool> Returns;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;

Expand Down Expand Up @@ -498,6 +498,10 @@ class DataAggregator : public DataReader {
/// If \p FileBuildID has no match, then issue an error and exit.
void processFileBuildID(StringRef FileBuildID);

/// Infer missing fall-throughs for branch-only traces (LBR top-of-stack
/// entries).
void imputeFallThroughs();

/// Debugging dump methods
void dump() const;
void dump(const PerfBranchSample &Sample) const;
Expand All @@ -509,6 +513,43 @@ class DataAggregator : public DataReader {
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;

/// Get instruction at \p Addr either from containing binary function or
/// disassemble in-place, and invoke \p Callback on resulting MCInst.
/// Returns the result of the callback or nullopt.
template <typename T>
std::optional<T>
testInstructionAt(const uint64_t Addr,
std::function<T(const MCInst &)> Callback) const {
BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
if (!Func)
return std::nullopt;
const uint64_t Offset = Addr - Func->getAddress();
if (Func->hasInstructions()) {
if (auto *MI = Func->getInstructionAtOffset(Offset))
return Callback(*MI);
} else {
if (auto MI = Func->disassembleInstructionAtOffset(Offset))
return Callback(*MI);
}
return std::nullopt;
}

/// Apply \p Callback to the instruction at \p Addr, and memoize the result
/// in a \p Map.
template <typename T>
std::optional<T> testAndSet(const uint64_t Addr,
std::function<T(const MCInst &)> Callback,
std::unordered_map<uint64_t, T> &Map) {
auto It = Map.find(Addr);
if (It != Map.end())
return It->second;
if (std::optional<T> Res = testInstructionAt<T>(Addr, Callback)) {
Map.emplace(Addr, *Res);
return *Res;
}
return std::nullopt;
}

public:
/// If perf.data was collected without build ids, the buildid-list may contain
/// incomplete entries. Return true if the buffer containing
Expand Down
94 changes: 76 additions & 18 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ FilterPID("pid",
cl::Optional,
cl::cat(AggregatorCategory));

static cl::opt<bool> ImputeTraceFallthrough(
"impute-trace-fall-through",
cl::desc("impute missing fall-throughs for branch-only traces"),
cl::Optional, cl::cat(AggregatorCategory));

static cl::opt<bool>
IgnoreBuildID("ignore-build-id",
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
Expand Down Expand Up @@ -513,6 +518,68 @@ void DataAggregator::parsePerfData(BinaryContext &BC) {
deleteTempFiles();
}

void DataAggregator::imputeFallThroughs() {
if (Traces.empty())
return;

std::pair PrevBranch(Trace::EXTERNAL, Trace::EXTERNAL);
uint64_t AggregateCount = 0;
uint64_t AggregateFallthroughSize = 0;
uint64_t InferredTraces = 0;

// Helper map with whether the instruction is a call/ret/unconditional branch
std::unordered_map<uint64_t, bool> IsUncondJumpMap;
auto checkUncondJump = [&](const uint64_t Addr) {
auto isUncondJump = [&](const MCInst &MI) -> bool {
return BC->MIB->IsUnconditionalJump(MI);
};
return testAndSet<bool>(Addr, isUncondJump, IsUncondJumpMap).value_or(true);
};

// Traces are sorted by their component addresses (Branch, From, To).
// assert(is_sorted(Traces));

// Traces corresponding to the top-of-stack branch entry with a missing
// fall-through have BR_ONLY(-1ULL/UINT64_MAX) in To field, meaning that for
// fixed values of Branch and From branch-only traces are stored after all
// traces with valid fall-through.
//
// Group traces by (Branch, From) and compute weighted average fall-through
// length for the top-of-stack trace (closing the group) by accumulating the
// fall-through lengths of traces with valid fall-throughs earlier in the
// group.
for (auto &[Trace, Info] : Traces) {
// Skip fall-throughs in external code.
if (Trace.From == Trace::EXTERNAL)
continue;
std::pair CurrentBranch(Trace.Branch, Trace.From);
// BR_ONLY must be the last trace in the group
if (Trace.To == Trace::BR_ONLY) {
// If the group is not empty, use aggregate values, otherwise 0-length
// for unconditional jumps (call/ret/uncond branch) or 1-length for others
uint64_t InferredBytes = PrevBranch == CurrentBranch
? AggregateFallthroughSize / AggregateCount
: !checkUncondJump(Trace.From);
Trace.To = Trace.From + InferredBytes;
LLVM_DEBUG(dbgs() << "imputed " << Trace << " (" << InferredBytes
<< " bytes)\n");
++InferredTraces;
} else {
// Trace with a valid fall-through
// New group: reset aggregates.
if (CurrentBranch != PrevBranch)
AggregateCount = AggregateFallthroughSize = 0;
// Only use valid fall-through lengths
if (Trace.To != Trace::EXTERNAL)
AggregateFallthroughSize += (Trace.To - Trace.From) * Info.TakenCount;
AggregateCount += Info.TakenCount;
}
PrevBranch = CurrentBranch;
}
if (opts::Verbosity >= 1)
outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";
}

Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;

Expand All @@ -525,6 +592,9 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
// Sort parsed traces for faster processing.
llvm::sort(Traces, llvm::less_first());

if (opts::ImputeTraceFallthrough)
imputeFallThroughs();

if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap())
return errorCodeToError(EC);
Expand Down Expand Up @@ -726,22 +796,10 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
}

bool DataAggregator::checkReturn(uint64_t Addr) {
auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
if (llvm::is_contained(Returns, Addr))
return true;

BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
if (!Func)
return false;

const uint64_t Offset = Addr - Func->getAddress();
if (Func->hasInstructions()
? isReturn(Func->getInstructionAtOffset(Offset))
: isReturn(Func->disassembleInstructionAtOffset(Offset))) {
Returns.emplace(Addr);
return true;
}
return false;
auto isReturn = [&](const MCInst &MI) -> bool {
return BC->MIB->isReturn(MI);
};
return testAndSet<bool>(Addr, isReturn, Returns).value_or(false);
}

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
Expand Down Expand Up @@ -1331,7 +1389,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
if (!Addr[0]->Offset)
Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN;
else
Returns.emplace(Addr[0]->Offset);
Returns.emplace(Addr[0]->Offset, true);
}

/// Record a trace.
Expand Down Expand Up @@ -1592,7 +1650,7 @@ void DataAggregator::processBranchEvents() {
NamedRegionTimer T("processBranch", "Processing branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

Returns.emplace(Trace::FT_EXTERNAL_RETURN);
Returns.emplace(Trace::FT_EXTERNAL_RETURN, true);
for (const auto &[Trace, Info] : Traces) {
bool IsReturn = checkReturn(Trace.Branch);
// Ignore returns.
Expand Down
Loading