Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ class BinaryFunction {
/// Original size of the function.
uint64_t Size;

/// Original instruction count of the function, if disassembly succeeded.
uint64_t InputInstructionCount{0};

/// Address of the function in output.
uint64_t OutputAddress{0};

Expand Down Expand Up @@ -2173,6 +2176,9 @@ class BinaryFunction {
/// Get the number of instructions within this function.
uint64_t getInstructionCount() const;

/// Get the original number of instructions.
uint64_t getInputInstructionCount() const { return InputInstructionCount; }

const CFIInstrMapType &getFDEProgram() const { return FrameInstructions; }

void moveRememberRestorePair(BinaryBasicBlock *BB);
Expand Down
2 changes: 2 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,8 @@ Error BinaryFunction::disassemble() {

clearList(Relocations);

InputInstructionCount = Instructions.size();

if (!IsSimple) {
clearList(Instructions);
return createNonFatalBOLTError("");
Expand Down
73 changes: 73 additions & 0 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ static cl::opt<unsigned> TopCalledLimit(
"functions section"),
cl::init(100), cl::Hidden, cl::cat(BoltCategory));

// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
static cl::opt<bool> ShowDensity("show-density", cl::init(true),
cl::desc("show profile density details"),
cl::Optional);

static cl::opt<int> ProfileDensityCutOffHot(
"profile-density-cutoff-hot", cl::init(990000),
cl::desc("Total samples cutoff for functions used to calculate "
"profile density."));

static cl::opt<double> ProfileDensityThreshold(
"profile-density-threshold", cl::init(50),
cl::desc("If the profile density is below the given threshold, it "
"will be suggested to increase the sampling rate."),
cl::Optional);

} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -1383,6 +1399,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
uint64_t StaleSampleCount = 0;
uint64_t InferredSampleCount = 0;
std::vector<const BinaryFunction *> ProfiledFunctions;
std::vector<std::pair<double, uint64_t>> FuncDensityList;
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
Expand Down Expand Up @@ -1441,6 +1458,18 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
StaleSampleCount += SampleCount;
++NumAllStaleFunctions;
}

if (opts::ShowDensity) {
uint64_t Instructions = Function.getInputInstructionCount();
// In case of BOLT split functions registered in BAT, samples are
// automatically attributed to the main fragment. Add instructions from
// all fragments.
if (IsHotParentOfBOLTSplitFunction)
for (const BinaryFunction *Fragment : Function.getFragments())
Instructions += Fragment->getInputInstructionCount();
double Density = (double)1.0 * SampleCount / Instructions;
FuncDensityList.emplace_back(Density, SampleCount);
}
}
BC.NumProfiledFuncs = ProfiledFunctions.size();
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
Expand Down Expand Up @@ -1684,6 +1713,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
BC.outs() << ". Use -print-unknown to see the list.";
BC.outs() << '\n';
}

if (opts::ShowDensity) {
double Density = 0.0;
// Sorted by the density in descending order.
llvm::stable_sort(FuncDensityList,
[&](const std::pair<double, uint64_t> &A,
const std::pair<double, uint64_t> &B) {
if (A.first != B.first)
return A.first > B.first;
return A.second < B.second;
});

uint64_t AccumulatedSamples = 0;
uint32_t I = 0;
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
"The cutoff value is greater than 1000000(100%)");
while (AccumulatedSamples <
TotalSampleCount *
static_cast<float>(opts::ProfileDensityCutOffHot) /
1000000 &&
I < FuncDensityList.size()) {
AccumulatedSamples += FuncDensityList[I].second;
Density = FuncDensityList[I].first;
I++;
}
if (Density == 0.0) {
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
"--profile-density-cutoff-hot option is "
"set too low. Please check your command.\n";
} else if (Density < opts::ProfileDensityThreshold) {
BC.errs()
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
<< format("%.1f", opts::ProfileDensityThreshold / Density)
<< "x more samples. Please consider increasing sampling rate or "
"profiling for longer duration to get more samples.\n";
}

BC.outs() << "BOLT-INFO: Functions with density >= "
<< format("%.1f", Density) << " account for "
<< format("%.2f",
static_cast<double>(opts::ProfileDensityCutOffHot) /
10000)
<< "% total sample counts.\n";
}
return Error::success();
}

Expand Down
6 changes: 5 additions & 1 deletion bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
: BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
FuncBranchData *FBD = getBranchData(BF);
if (FBD || getFuncSampleData(BF.getNames())) {
BF.markProfiled(Flags);
if (FBD)
BF.RawBranchCount = FBD->getNumExecutedBranches();
}
}

for (auto &FuncBranches : NamesToBranches)
Expand Down
15 changes: 12 additions & 3 deletions bolt/test/X86/pre-aggregated-perf.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,22 @@ REQUIRES: system-linux

RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
RUN: --show-density --profile-density-threshold=9 \
RUN: --profile-density-cutoff-hot=970000 \
RUN: --profile-use-dfs | FileCheck %s

RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -data %t -o %t.null \
RUN: --show-density --profile-density-threshold=9 \
RUN: --profile-density-cutoff-hot=970000 | FileCheck %s
RUN: llvm-bolt %t.exe -data %t.new -o %t.null \
RUN: --show-density --profile-density-threshold=9 \
RUN: --profile-density-cutoff-hot=970000 | FileCheck %s
RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \
RUN: --show-density --profile-density-threshold=9 \
RUN: --profile-density-cutoff-hot=970000 | FileCheck %s

CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total sample counts.

RUN: cat %t | sort | FileCheck %s -check-prefix=PERF2BOLT
RUN: cat %t.new | FileCheck %s -check-prefix=NEWFORMAT
Expand Down