@@ -223,6 +223,22 @@ static cl::opt<unsigned> TopCalledLimit(
223223 " functions section" ),
224224 cl::init(100 ), cl::Hidden, cl::cat(BoltCategory));
225225
226+ // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
227+ static cl::opt<bool > ShowDensity (" show-density" , cl::init(false ),
228+ cl::desc(" show profile density details" ),
229+ cl::Optional);
230+
231+ static cl::opt<int > ProfileDensityCutOffHot (
232+ " profile-density-cutoff-hot" , cl::init(990000 ),
233+ cl::desc(" Total samples cutoff for functions used to calculate "
234+ " profile density." ));
235+
236+ static cl::opt<double > ProfileDensityThreshold (
237+ " profile-density-threshold" , cl::init(0 ),
238+ cl::desc(" If the profile density is below the given threshold, it "
239+ " will be suggested to increase the sampling rate." ),
240+ cl::Optional);
241+
226242} // namespace opts
227243
228244namespace llvm {
@@ -1383,6 +1399,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831399 uint64_t StaleSampleCount = 0 ;
13841400 uint64_t InferredSampleCount = 0 ;
13851401 std::vector<const BinaryFunction *> ProfiledFunctions;
1402+ std::vector<std::pair<double , uint64_t >> FuncDensityList;
13861403 const char *StaleFuncsHeader = " BOLT-INFO: Functions with stale profile:\n " ;
13871404 for (auto &BFI : BC.getBinaryFunctions ()) {
13881405 const BinaryFunction &Function = BFI.second ;
@@ -1441,6 +1458,26 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411458 StaleSampleCount += SampleCount;
14421459 ++NumAllStaleFunctions;
14431460 }
1461+
1462+ if (opts::ShowDensity) {
1463+ uint64_t Size = Function.getSize ();
1464+ // In case of BOLT split functions registered in BAT, executed traces are
1465+ // automatically attributed to the main fragment. Add up function sizes
1466+ // for all fragments.
1467+ if (IsHotParentOfBOLTSplitFunction)
1468+ for (const BinaryFunction *Fragment : Function.getFragments ())
1469+ Size += Fragment->getSize ();
1470+ uint64_t ExecutedBytes = Function.getSampleCountInBytes ();
1471+ if (!ExecutedBytes && Function.hasCFG ())
1472+ for (const BinaryBasicBlock &BB : Function)
1473+ ExecutedBytes += BB.getOriginalSize () * BB.getKnownExecutionCount ();
1474+ double Density = (double )1.0 * ExecutedBytes / Size;
1475+ FuncDensityList.emplace_back (Density, SampleCount);
1476+ LLVM_DEBUG (BC.outs () << Function << " : executed bytes "
1477+ << Function.getSampleCountInBytes () << " , size (b) "
1478+ << Size << " , density " << Density
1479+ << " , sample count " << SampleCount << ' \n ' );
1480+ }
14441481 }
14451482 BC.NumProfiledFuncs = ProfiledFunctions.size ();
14461483 BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1721,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841721 BC.outs () << " . Use -print-unknown to see the list." ;
16851722 BC.outs () << ' \n ' ;
16861723 }
1724+
1725+ if (opts::ShowDensity) {
1726+ double Density = 0.0 ;
1727+ // Sorted by the density in descending order.
1728+ llvm::stable_sort (FuncDensityList,
1729+ [&](const std::pair<double , uint64_t > &A,
1730+ const std::pair<double , uint64_t > &B) {
1731+ if (A.first != B.first )
1732+ return A.first > B.first ;
1733+ return A.second < B.second ;
1734+ });
1735+
1736+ uint64_t AccumulatedSamples = 0 ;
1737+ uint32_t I = 0 ;
1738+ assert (opts::ProfileDensityCutOffHot <= 1000000 &&
1739+ " The cutoff value is greater than 1000000(100%)" );
1740+ while (AccumulatedSamples <
1741+ TotalSampleCount *
1742+ static_cast <float >(opts::ProfileDensityCutOffHot) /
1743+ 1000000 &&
1744+ I < FuncDensityList.size ()) {
1745+ AccumulatedSamples += FuncDensityList[I].second ;
1746+ Density = FuncDensityList[I].first ;
1747+ I++;
1748+ }
1749+ if (Density == 0.0 ) {
1750+ BC.errs () << " BOLT-WARNING: the output profile is empty or the "
1751+ " --profile-density-cutoff-hot option is "
1752+ " set too low. Please check your command.\n " ;
1753+ } else if (Density < opts::ProfileDensityThreshold) {
1754+ BC.errs ()
1755+ << " BOLT-WARNING: BOLT is estimated to optimize better with "
1756+ << format (" %.1f" , opts::ProfileDensityThreshold / Density)
1757+ << " x more samples. Please consider increasing sampling rate or "
1758+ " profiling for longer duration to get more samples.\n " ;
1759+ }
1760+
1761+ BC.outs () << " BOLT-INFO: Functions with density >= "
1762+ << format (" %.1f" , Density) << " account for "
1763+ << format (" %.2f" ,
1764+ static_cast <double >(opts::ProfileDensityCutOffHot) /
1765+ 10000 )
1766+ << " % total sample counts.\n " ;
1767+ }
16871768 return Error::success ();
16881769}
16891770
0 commit comments