Skip to content

Commit 7e45034

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.4
2 parents 70ef5eb + dc5ee08 commit 7e45034

File tree

4 files changed

+105
-4
lines changed

4 files changed

+105
-4
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,9 @@ class BinaryFunction {
387387
/// Raw branch count for this function in the profile.
388388
uint64_t RawBranchCount{0};
389389

390+
/// Dynamically executed function bytes, used for density computation.
391+
uint64_t SampleCountInBytes{0};
392+
390393
/// Indicates the type of profile the function is using.
391394
uint16_t ProfileFlags{PF_NONE};
392395

@@ -1845,6 +1848,9 @@ class BinaryFunction {
18451848
/// to this function.
18461849
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
18471850

1851+
/// Return the number of dynamically executed bytes, from raw perf data.
1852+
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
1853+
18481854
/// Return the execution count for functions with known profile.
18491855
/// Return 0 if the function has no profile.
18501856
uint64_t getKnownExecutionCount() const {

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,22 @@ static cl::opt<unsigned> TopCalledLimit(
223223
"functions section"),
224224
cl::init(100), cl::Hidden, cl::cat(BoltCategory));
225225

226+
// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
227+
static cl::opt<bool> ShowDensity("show-density", cl::init(false),
228+
cl::desc("show profile density details"),
229+
cl::Optional);
230+
231+
static cl::opt<int> ProfileDensityCutOffHot(
232+
"profile-density-cutoff-hot", cl::init(990000),
233+
cl::desc("Total samples cutoff for functions used to calculate "
234+
"profile density."));
235+
236+
static cl::opt<double> ProfileDensityThreshold(
237+
"profile-density-threshold", cl::init(0),
238+
cl::desc("If the profile density is below the given threshold, it "
239+
"will be suggested to increase the sampling rate."),
240+
cl::Optional);
241+
226242
} // namespace opts
227243

228244
namespace llvm {
@@ -1383,6 +1399,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831399
uint64_t StaleSampleCount = 0;
13841400
uint64_t InferredSampleCount = 0;
13851401
std::vector<const BinaryFunction *> ProfiledFunctions;
1402+
std::vector<std::pair<double, uint64_t>> FuncDensityList;
13861403
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
13871404
for (auto &BFI : BC.getBinaryFunctions()) {
13881405
const BinaryFunction &Function = BFI.second;
@@ -1441,6 +1458,26 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411458
StaleSampleCount += SampleCount;
14421459
++NumAllStaleFunctions;
14431460
}
1461+
1462+
if (opts::ShowDensity) {
1463+
uint64_t Size = Function.getSize();
1464+
// In case of BOLT split functions registered in BAT, executed traces are
1465+
// automatically attributed to the main fragment. Add up function sizes
1466+
// for all fragments.
1467+
if (IsHotParentOfBOLTSplitFunction)
1468+
for (const BinaryFunction *Fragment : Function.getFragments())
1469+
Size += Fragment->getSize();
1470+
uint64_t ExecutedBytes = Function.getSampleCountInBytes();
1471+
if (!ExecutedBytes && Function.hasCFG())
1472+
for (const BinaryBasicBlock &BB : Function)
1473+
ExecutedBytes += BB.getOriginalSize() * BB.getKnownExecutionCount();
1474+
double Density = (double)1.0 * ExecutedBytes / Size;
1475+
FuncDensityList.emplace_back(Density, SampleCount);
1476+
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
1477+
<< Function.getSampleCountInBytes() << ", size (b) "
1478+
<< Size << ", density " << Density
1479+
<< ", sample count " << SampleCount << '\n');
1480+
}
14441481
}
14451482
BC.NumProfiledFuncs = ProfiledFunctions.size();
14461483
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1721,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841721
BC.outs() << ". Use -print-unknown to see the list.";
16851722
BC.outs() << '\n';
16861723
}
1724+
1725+
if (opts::ShowDensity) {
1726+
double Density = 0.0;
1727+
// Sorted by the density in descending order.
1728+
llvm::stable_sort(FuncDensityList,
1729+
[&](const std::pair<double, uint64_t> &A,
1730+
const std::pair<double, uint64_t> &B) {
1731+
if (A.first != B.first)
1732+
return A.first > B.first;
1733+
return A.second < B.second;
1734+
});
1735+
1736+
uint64_t AccumulatedSamples = 0;
1737+
uint32_t I = 0;
1738+
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
1739+
"The cutoff value is greater than 1000000(100%)");
1740+
while (AccumulatedSamples <
1741+
TotalSampleCount *
1742+
static_cast<float>(opts::ProfileDensityCutOffHot) /
1743+
1000000 &&
1744+
I < FuncDensityList.size()) {
1745+
AccumulatedSamples += FuncDensityList[I].second;
1746+
Density = FuncDensityList[I].first;
1747+
I++;
1748+
}
1749+
if (Density == 0.0) {
1750+
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
1751+
"--profile-density-cutoff-hot option is "
1752+
"set too low. Please check your command.\n";
1753+
} else if (Density < opts::ProfileDensityThreshold) {
1754+
BC.errs()
1755+
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
1756+
<< format("%.1f", opts::ProfileDensityThreshold / Density)
1757+
<< "x more samples. Please consider increasing sampling rate or "
1758+
"profiling for longer duration to get more samples.\n";
1759+
}
1760+
1761+
BC.outs() << "BOLT-INFO: Functions with density >= "
1762+
<< format("%.1f", Density) << " account for "
1763+
<< format("%.2f",
1764+
static_cast<double>(opts::ProfileDensityCutOffHot) /
1765+
10000)
1766+
<< "% total sample counts.\n";
1767+
}
16871768
return Error::success();
16881769
}
16891770

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
638638
: BinaryFunction::PF_LBR;
639639
for (auto &BFI : BC.getBinaryFunctions()) {
640640
BinaryFunction &BF = BFI.second;
641-
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
641+
FuncBranchData *FBD = getBranchData(BF);
642+
if (FBD || getFuncSampleData(BF.getNames())) {
642643
BF.markProfiled(Flags);
644+
if (FBD)
645+
BF.RawBranchCount = FBD->getNumExecutedBranches();
646+
}
643647
}
644648

645649
for (auto &FuncBranches : NamesToBranches)
@@ -845,6 +849,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
845849
return false;
846850
}
847851

852+
// Set ParentFunc to BAT parent function or FromFunc itself.
853+
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
854+
if (!ParentFunc)
855+
ParentFunc = FromFunc;
856+
ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
857+
848858
std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
849859
BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
850860
Second.From)
@@ -864,13 +874,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
864874
<< FromFunc->getPrintName() << ":"
865875
<< Twine::utohexstr(First.To) << " to "
866876
<< Twine::utohexstr(Second.From) << ".\n");
867-
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
868877
for (auto [From, To] : *FTs) {
869878
if (BAT) {
870879
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
871880
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
872881
}
873-
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
882+
doIntraBranch(*ParentFunc, From, To, Count, false);
874883
}
875884

876885
return true;

bolt/test/X86/pre-aggregated-perf.test

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@ REQUIRES: system-linux
1111

1212
RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
1313
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
14-
RUN: --profile-use-dfs | FileCheck %s
14+
RUN: --show-density --profile-density-threshold=9 \
15+
RUN: --profile-density-cutoff-hot=970000 \
16+
RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B
17+
18+
CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
19+
CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
1520

1621
RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
1722
RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s

0 commit comments

Comments
 (0)