@@ -223,6 +223,22 @@ static cl::opt<unsigned> TopCalledLimit(
223223 " functions section" ),
224224 cl::init(100 ), cl::Hidden, cl::cat(BoltCategory));
225225
226+ // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
227+ static cl::opt<bool > ShowDensity (" show-density" , cl::init(false ),
228+ cl::desc(" show profile density details" ),
229+ cl::Optional);
230+
231+ static cl::opt<int > ProfileDensityCutOffHot (
232+ " profile-density-cutoff-hot" , cl::init(990000 ),
233+ cl::desc(" Total samples cutoff for functions used to calculate "
234+ " profile density." ));
235+
236+ static cl::opt<double > ProfileDensityThreshold (
237+ " profile-density-threshold" , cl::init(0 ),
238+ cl::desc(" If the profile density is below the given threshold, it "
239+ " will be suggested to increase the sampling rate." ),
240+ cl::Optional);
241+
226242} // namespace opts
227243
228244namespace llvm {
@@ -1383,6 +1399,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831399 uint64_t StaleSampleCount = 0 ;
13841400 uint64_t InferredSampleCount = 0 ;
13851401 std::vector<const BinaryFunction *> ProfiledFunctions;
1402+ std::vector<std::pair<double , uint64_t >> FuncDensityList;
13861403 const char *StaleFuncsHeader = " BOLT-INFO: Functions with stale profile:\n " ;
13871404 for (auto &BFI : BC.getBinaryFunctions ()) {
13881405 const BinaryFunction &Function = BFI.second ;
@@ -1441,6 +1458,25 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411458 StaleSampleCount += SampleCount;
14421459 ++NumAllStaleFunctions;
14431460 }
1461+
1462+ if (opts::ShowDensity) {
1463+ uint64_t Size = Function.getSize ();
1464+ // In case of BOLT split functions registered in BAT, executed traces are
1465+ // automatically attributed to the main fragment. Add up function sizes
1466+ // for all fragments.
1467+ if (IsHotParentOfBOLTSplitFunction)
1468+ for (const BinaryFunction *Fragment : Function.getFragments ())
1469+ Size += Fragment->getSize ();
1470+ uint64_t ExecutedBytes = Function.getSampleCountInBytes ();
1471+ if (!ExecutedBytes && Function.isSimple () && Function.hasProfile ())
1472+ for (const BinaryBasicBlock &BB : Function)
1473+ ExecutedBytes += BB.getOriginalSize () * BB.getKnownExecutionCount ();
1474+ double Density = (double )1.0 * ExecutedBytes / Size;
1475+ FuncDensityList.emplace_back (Density, SampleCount);
1476+ LLVM_DEBUG (BC.outs () << Function << " : executed bytes " << ExecutedBytes
1477+ << " , size (b) " << Size << " , density " << Density
1478+ << " , sample count " << SampleCount << ' \n ' );
1479+ }
14441480 }
14451481 BC.NumProfiledFuncs = ProfiledFunctions.size ();
14461482 BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1720,54 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841720 BC.outs () << " . Use -print-unknown to see the list." ;
16851721 BC.outs () << ' \n ' ;
16861722 }
1723+
1724+ if (opts::ShowDensity) {
1725+ double Density = 0.0 ;
1726+ llvm::sort (FuncDensityList);
1727+ // Find the total number of samples in non-simple functions with zero
1728+ // density. We fail to compute density for such functions with fdata/YAML
1729+ // profiles as we compute the density from CFG and the profile can't be
1730+ // attached to non-simple functions.
1731+ //
1732+ // Exclude those samples from the tally as otherwise we may end up with zero
1733+ // profile density depending on how hot non-simple functions are.
1734+ uint64_t NonSimpleSampleCount = 0 ;
1735+ for (const auto &[CurDensity, Samples] : FuncDensityList) {
1736+ if (CurDensity != 0 )
1737+ break ;
1738+ NonSimpleSampleCount += Samples;
1739+ }
1740+
1741+ uint64_t AccumulatedSamples = NonSimpleSampleCount;
1742+ assert (opts::ProfileDensityCutOffHot <= 1000000 &&
1743+ " The cutoff value is greater than 1000000(100%)" );
1744+ for (const auto &[CurDensity, Samples] : llvm::reverse (FuncDensityList)) {
1745+ if (AccumulatedSamples >=
1746+ TotalSampleCount * static_cast <float >(opts::ProfileDensityCutOffHot) /
1747+ 1000000 )
1748+ break ;
1749+ Density = CurDensity;
1750+ AccumulatedSamples += Samples;
1751+ }
1752+ if (Density == 0.0 ) {
1753+ BC.errs () << " BOLT-WARNING: the output profile is empty or the "
1754+ " --profile-density-cutoff-hot option is "
1755+ " set too low. Please check your command.\n " ;
1756+ } else if (Density < opts::ProfileDensityThreshold) {
1757+ BC.errs ()
1758+ << " BOLT-WARNING: BOLT is estimated to optimize better with "
1759+ << format (" %.1f" , opts::ProfileDensityThreshold / Density)
1760+ << " x more samples. Please consider increasing sampling rate or "
1761+ " profiling for longer duration to get more samples.\n " ;
1762+ }
1763+
1764+ BC.outs () << " BOLT-INFO: Functions with density >= "
1765+ << format (" %.1f" , Density) << " account for "
1766+ << format (" %.2f" ,
1767+ static_cast <double >(opts::ProfileDensityCutOffHot) /
1768+ 10000 )
1769+ << " % total sample counts.\n " ;
1770+ }
16871771 return Error::success ();
16881772}
16891773
0 commit comments