|
15 | 15 | #include "bolt/Core/ParallelUtilities.h" |
16 | 16 | #include "bolt/Passes/ReorderAlgorithm.h" |
17 | 17 | #include "bolt/Passes/ReorderFunctions.h" |
| 18 | +#include "bolt/Utils/CommandLineOpts.h" |
18 | 19 | #include "llvm/Support/CommandLine.h" |
19 | 20 | #include <atomic> |
20 | 21 | #include <mutex> |
@@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit( |
223 | 224 | "functions section"), |
224 | 225 | cl::init(100), cl::Hidden, cl::cat(BoltCategory)); |
225 | 226 |
|
| 227 | +// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp |
| 228 | +static cl::opt<int> ProfileDensityCutOffHot( |
| 229 | + "profile-density-cutoff-hot", cl::init(990000), |
| 230 | + cl::desc("Total samples cutoff for functions used to calculate " |
| 231 | + "profile density.")); |
| 232 | + |
| 233 | +static cl::opt<double> ProfileDensityThreshold( |
| 234 | + "profile-density-threshold", cl::init(60), |
| 235 | + cl::desc("If the profile density is below the given threshold, it " |
| 236 | + "will be suggested to increase the sampling rate."), |
| 237 | + cl::Optional); |
| 238 | + |
226 | 239 | } // namespace opts |
227 | 240 |
|
228 | 241 | namespace llvm { |
@@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { |
1383 | 1396 | uint64_t StaleSampleCount = 0; |
1384 | 1397 | uint64_t InferredSampleCount = 0; |
1385 | 1398 | std::vector<const BinaryFunction *> ProfiledFunctions; |
| 1399 | + std::vector<std::pair<double, uint64_t>> FuncDensityList; |
1386 | 1400 | const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n"; |
1387 | 1401 | for (auto &BFI : BC.getBinaryFunctions()) { |
1388 | 1402 | const BinaryFunction &Function = BFI.second; |
@@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { |
1441 | 1455 | StaleSampleCount += SampleCount; |
1442 | 1456 | ++NumAllStaleFunctions; |
1443 | 1457 | } |
| 1458 | + |
| 1459 | + if (opts::ShowDensity) { |
| 1460 | + uint64_t Size = Function.getSize(); |
| 1461 | + // In case of BOLT split functions registered in BAT, executed traces are |
| 1462 | + // automatically attributed to the main fragment. Add up function sizes |
| 1463 | + // for all fragments. |
| 1464 | + if (IsHotParentOfBOLTSplitFunction) |
| 1465 | + for (const BinaryFunction *Fragment : Function.getFragments()) |
| 1466 | + Size += Fragment->getSize(); |
| 1467 | + double Density = (double)1.0 * Function.getSampleCountInBytes() / Size; |
| 1468 | + FuncDensityList.emplace_back(Density, SampleCount); |
| 1469 | + LLVM_DEBUG(BC.outs() << Function << ": executed bytes " |
| 1470 | + << Function.getSampleCountInBytes() << ", size (b) " |
| 1471 | + << Size << ", density " << Density |
| 1472 | + << ", sample count " << SampleCount << '\n'); |
| 1473 | + } |
1444 | 1474 | } |
1445 | 1475 | BC.NumProfiledFuncs = ProfiledFunctions.size(); |
1446 | 1476 | BC.NumStaleProfileFuncs = NumStaleProfileFunctions; |
@@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { |
1684 | 1714 | BC.outs() << ". Use -print-unknown to see the list."; |
1685 | 1715 | BC.outs() << '\n'; |
1686 | 1716 | } |
| 1717 | + |
| 1718 | + if (opts::ShowDensity) { |
| 1719 | + double Density = 0.0; |
| 1720 | + // Sorted by the density in descending order. |
| 1721 | + llvm::stable_sort(FuncDensityList, |
| 1722 | + [&](const std::pair<double, uint64_t> &A, |
| 1723 | + const std::pair<double, uint64_t> &B) { |
| 1724 | + if (A.first != B.first) |
| 1725 | + return A.first > B.first; |
| 1726 | + return A.second < B.second; |
| 1727 | + }); |
| 1728 | + |
| 1729 | + uint64_t AccumulatedSamples = 0; |
| 1730 | + uint32_t I = 0; |
| 1731 | + assert(opts::ProfileDensityCutOffHot <= 1000000 && |
| 1732 | + "The cutoff value is greater than 1000000(100%)"); |
| 1733 | + while (AccumulatedSamples < |
| 1734 | + TotalSampleCount * |
| 1735 | + static_cast<float>(opts::ProfileDensityCutOffHot) / |
| 1736 | + 1000000 && |
| 1737 | + I < FuncDensityList.size()) { |
| 1738 | + AccumulatedSamples += FuncDensityList[I].second; |
| 1739 | + Density = FuncDensityList[I].first; |
| 1740 | + I++; |
| 1741 | + } |
| 1742 | + if (Density == 0.0) { |
| 1743 | + BC.errs() << "BOLT-WARNING: the output profile is empty or the " |
| 1744 | + "--profile-density-cutoff-hot option is " |
| 1745 | + "set too low. Please check your command.\n"; |
| 1746 | + } else if (Density < opts::ProfileDensityThreshold) { |
| 1747 | + BC.errs() |
| 1748 | + << "BOLT-WARNING: BOLT is estimated to optimize better with " |
| 1749 | + << format("%.1f", opts::ProfileDensityThreshold / Density) |
| 1750 | + << "x more samples. Please consider increasing sampling rate or " |
| 1751 | + "profiling for longer duration to get more samples.\n"; |
| 1752 | + } |
| 1753 | + |
| 1754 | + BC.outs() << "BOLT-INFO: Functions with density >= " |
| 1755 | + << format("%.1f", Density) << " account for " |
| 1756 | + << format("%.2f", |
| 1757 | + static_cast<double>(opts::ProfileDensityCutOffHot) / |
| 1758 | + 10000) |
| 1759 | + << "% total sample counts.\n"; |
| 1760 | + } |
1687 | 1761 | return Error::success(); |
1688 | 1762 | } |
1689 | 1763 |
|
|
0 commit comments