Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ class BinaryFunction {
/// Raw branch count for this function in the profile.
uint64_t RawBranchCount{0};

/// Dynamically executed function bytes, used for density computation.
uint64_t ExecutedBytes{0};

/// Indicates the type of profile the function is using.
uint16_t ProfileFlags{PF_NONE};

Expand Down Expand Up @@ -1843,6 +1846,9 @@ class BinaryFunction {
/// to this function.
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }

/// Return the number of dynamically executed bytes, from raw perf data.
uint64_t getExecutedBytes() const { return ExecutedBytes; }

/// Return the execution count for functions with known profile.
/// Return 0 if the function has no profile.
uint64_t getKnownExecutionCount() const {
Expand Down
77 changes: 77 additions & 0 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ static cl::opt<unsigned> TopCalledLimit(
"functions section"),
cl::init(100), cl::Hidden, cl::cat(BoltCategory));

// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
static cl::opt<bool> ShowDensity("show-density", cl::init(false),
cl::desc("show profile density details"),
cl::Optional);

static cl::opt<int> ProfileDensityCutOffHot(
"profile-density-cutoff-hot", cl::init(990000),
cl::desc("Total samples cutoff for functions used to calculate "
"profile density."));

static cl::opt<double> ProfileDensityThreshold(
"profile-density-threshold", cl::init(50),
cl::desc("If the profile density is below the given threshold, it "
"will be suggested to increase the sampling rate."),
cl::Optional);

} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -1383,6 +1399,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
uint64_t StaleSampleCount = 0;
uint64_t InferredSampleCount = 0;
std::vector<const BinaryFunction *> ProfiledFunctions;
std::vector<std::pair<double, uint64_t>> FuncDensityList;
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
Expand Down Expand Up @@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
StaleSampleCount += SampleCount;
++NumAllStaleFunctions;
}

if (opts::ShowDensity) {
uint64_t Size = Function.getSize();
// In case of BOLT split functions registered in BAT, executed traces are
// automatically attributed to the main fragment. Add up function sizes
// for all fragments.
if (IsHotParentOfBOLTSplitFunction)
for (const BinaryFunction *Fragment : Function.getFragments())
Size += Fragment->getSize();
double Density = (double)1.0 * Function.getExecutedBytes() / Size;
FuncDensityList.emplace_back(Density, SampleCount);
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
<< Function.getExecutedBytes() << ", size (b) "
<< Size << ", density " << Density
<< ", sample count " << SampleCount << '\n');
}
}
BC.NumProfiledFuncs = ProfiledFunctions.size();
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
Expand Down Expand Up @@ -1684,6 +1717,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
BC.outs() << ". Use -print-unknown to see the list.";
BC.outs() << '\n';
}

if (opts::ShowDensity) {
double Density = 0.0;
// Sorted by the density in descending order.
llvm::stable_sort(FuncDensityList,
[&](const std::pair<double, uint64_t> &A,
const std::pair<double, uint64_t> &B) {
if (A.first != B.first)
return A.first > B.first;
return A.second < B.second;
});

uint64_t AccumulatedSamples = 0;
uint32_t I = 0;
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
"The cutoff value is greater than 1000000(100%)");
while (AccumulatedSamples <
TotalSampleCount *
static_cast<float>(opts::ProfileDensityCutOffHot) /
1000000 &&
I < FuncDensityList.size()) {
AccumulatedSamples += FuncDensityList[I].second;
Density = FuncDensityList[I].first;
I++;
}
if (Density == 0.0) {
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
"--profile-density-cutoff-hot option is "
"set too low. Please check your command.\n";
} else if (Density < opts::ProfileDensityThreshold) {
BC.errs()
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
<< format("%.1f", opts::ProfileDensityThreshold / Density)
<< "x more samples. Please consider increasing sampling rate or "
"profiling for longer duration to get more samples.\n";
}

BC.outs() << "BOLT-INFO: Functions with density >= "
<< format("%.1f", Density) << " account for "
<< format("%.2f",
static_cast<double>(opts::ProfileDensityCutOffHot) /
10000)
<< "% total sample counts.\n";
}
return Error::success();
}

Expand Down
15 changes: 12 additions & 3 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
: BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
FuncBranchData *FBD = getBranchData(BF);
if (FBD || getFuncSampleData(BF.getNames())) {
BF.markProfiled(Flags);
if (FBD)
BF.RawBranchCount = FBD->getNumExecutedBranches();
}
}

for (auto &FuncBranches : NamesToBranches)
Expand Down Expand Up @@ -845,6 +849,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
return false;
}

// Set ParentFunc to BAT parent function or FromFunc itself.
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
if (!ParentFunc)
ParentFunc = FromFunc;
ParentFunc->ExecutedBytes += Count * (Second.From - First.To);

std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
Second.From)
Expand All @@ -864,13 +874,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
for (auto [From, To] : *FTs) {
if (BAT) {
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
}
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
doIntraBranch(*ParentFunc, From, To, Count, false);
}

return true;
Expand Down
7 changes: 6 additions & 1 deletion bolt/test/X86/pre-aggregated-perf.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@ REQUIRES: system-linux

RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
RUN: --profile-use-dfs | FileCheck %s
RUN: --show-density --profile-density-threshold=9 \
RUN: --profile-density-cutoff-hot=970000 \
RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B

CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.

RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
Expand Down