Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,12 @@ class MCPlusBuilder {
return Analysis->isIndirectBranch(Inst);
}

bool IsUnconditionalJump(const MCInst &Inst) const {
const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
// barrier captures returns and unconditional branches
return Desc.isCall() || Desc.isBarrier();
}

/// Returns true if the instruction is memory indirect call or jump
virtual bool isBranchOnMem(const MCInst &Inst) const {
llvm_unreachable("not implemented");
Expand Down
43 changes: 42 additions & 1 deletion bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class DataAggregator : public DataReader {
std::vector<std::pair<Trace, TakenBranchInfo>> Traces;
/// Pre-populated addresses of returns, coming from pre-aggregated data or
/// disassembly. Used to disambiguate call-continuation fall-throughs.
std::unordered_set<uint64_t> Returns;
std::unordered_map<uint64_t, bool> Returns;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;

Expand Down Expand Up @@ -499,6 +499,10 @@ class DataAggregator : public DataReader {
/// If \p FileBuildID has no match, then issue an error and exit.
void processFileBuildID(StringRef FileBuildID);

/// Infer missing fall-throughs for branch-only traces (LBR top-of-stack
/// entries).
void imputeFallThroughs();

/// Debugging dump methods
void dump() const;
void dump(const PerfBranchSample &Sample) const;
Expand All @@ -510,6 +514,43 @@ class DataAggregator : public DataReader {
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;

/// Get instruction at \p Addr either from containing binary function or
/// disassemble in-place, and invoke \p Callback on resulting MCInst.
/// Returns the result of the callback or nullopt.
template <typename T>
std::optional<T>
testInstructionAt(const uint64_t Addr,
std::function<T(const MCInst &)> Callback) const {
BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
if (!Func)
return std::nullopt;
const uint64_t Offset = Addr - Func->getAddress();
if (Func->hasInstructions()) {
if (auto *MI = Func->getInstructionAtOffset(Offset))
return Callback(*MI);
} else {
if (auto MI = Func->disassembleInstructionAtOffset(Offset))
return Callback(*MI);
}
return std::nullopt;
}

/// Apply \p Callback to the instruction at \p Addr, and memoize the result
/// in a \p Map.
template <typename T>
std::optional<T> testAndSet(const uint64_t Addr,
std::function<T(const MCInst &)> Callback,
std::unordered_map<uint64_t, T> &Map) {
auto It = Map.find(Addr);
if (It != Map.end())
return It->second;
if (std::optional<T> Res = testInstructionAt<T>(Addr, Callback)) {
Map.emplace(Addr, *Res);
return *Res;
}
return std::nullopt;
}

public:
/// If perf.data was collected without build ids, the buildid-list may contain
/// incomplete entries. Return true if the buffer containing
Expand Down
75 changes: 57 additions & 18 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ FilterPID("pid",
cl::Optional,
cl::cat(AggregatorCategory));

static cl::opt<bool> ImputeTraceFallthrough(
"impute-trace-fall-through",
cl::desc("impute missing fall-throughs for branch-only traces"),
cl::Optional, cl::cat(AggregatorCategory));

static cl::opt<bool>
IgnoreBuildID("ignore-build-id",
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
Expand Down Expand Up @@ -529,6 +534,49 @@ void DataAggregator::parsePerfData(BinaryContext &BC) {
deleteTempFiles();
}

void DataAggregator::imputeFallThroughs() {
if (Traces.empty())
return;

std::pair PrevBranch(Trace::EXTERNAL, Trace::EXTERNAL);
uint64_t AggregateCount = 0;
uint64_t AggregateFallthroughSize = 0;
uint64_t InferredTraces = 0;

// Helper map with whether the instruction is a call/ret/unconditional branch
std::unordered_map<uint64_t, bool> IsUncondJumpMap;
auto checkUncondJump = [&](const uint64_t Addr) {
auto isUncondJump = [&](const MCInst &MI) -> bool {
return BC->MIB->IsUnconditionalJump(MI);
};
return testAndSet<bool>(Addr, isUncondJump, IsUncondJumpMap).value_or(true);
};

for (auto &[Trace, Info] : Traces) {
if (Trace.From == Trace::EXTERNAL)
continue;
std::pair CurrentBranch(Trace.Branch, Trace.From);
if (Trace.To == Trace::BR_ONLY) {
uint64_t InferredBytes = PrevBranch == CurrentBranch
? AggregateFallthroughSize / AggregateCount
: !checkUncondJump(Trace.From);
Trace.To = Trace.From + InferredBytes;
LLVM_DEBUG(dbgs() << "imputed " << Trace << " (" << InferredBytes
<< " bytes)\n");
++InferredTraces;
} else {
if (CurrentBranch != PrevBranch)
AggregateCount = AggregateFallthroughSize = 0;
if (Trace.To != Trace::EXTERNAL)
AggregateFallthroughSize += (Trace.To - Trace.From) * Info.TakenCount;
AggregateCount += Info.TakenCount;
}
PrevBranch = CurrentBranch;
}
if (opts::Verbosity >= 1)
outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";
}

Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;

Expand All @@ -541,6 +589,9 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
// Sort parsed traces for faster processing.
llvm::sort(Traces, llvm::less_first());

if (opts::ImputeTraceFallthrough)
imputeFallThroughs();

if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap())
return errorCodeToError(EC);
Expand Down Expand Up @@ -742,22 +793,10 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
}

bool DataAggregator::checkReturn(uint64_t Addr) {
auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
if (llvm::is_contained(Returns, Addr))
return true;

BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
if (!Func)
return false;

const uint64_t Offset = Addr - Func->getAddress();
if (Func->hasInstructions()
? isReturn(Func->getInstructionAtOffset(Offset))
: isReturn(Func->disassembleInstructionAtOffset(Offset))) {
Returns.emplace(Addr);
return true;
}
return false;
auto isReturn = [&](const MCInst &MI) -> bool {
return BC->MIB->isReturn(MI);
};
return testAndSet<bool>(Addr, isReturn, Returns).value_or(false);
}

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
Expand Down Expand Up @@ -1347,7 +1386,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
if (!Addr[0]->Offset)
Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN;
else
Returns.emplace(Addr[0]->Offset);
Returns.emplace(Addr[0]->Offset, true);
}

/// Record a trace.
Expand Down Expand Up @@ -1608,7 +1647,7 @@ void DataAggregator::processBranchEvents() {
NamedRegionTimer T("processBranch", "Processing branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

Returns.emplace(Trace::FT_EXTERNAL_RETURN);
Returns.emplace(Trace::FT_EXTERNAL_RETURN, true);
for (const auto &[Trace, Info] : Traces) {
bool IsReturn = checkReturn(Trace.Branch);
// Ignore returns.
Expand Down
Loading