Skip to content

Commit 1d6d666

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.4
2 parents a8644b3 + 4ded0d7 commit 1d6d666

File tree

4 files changed

+100
-179
lines changed

4 files changed

+100
-179
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ class DataAggregator : public DataReader {
231231
/// for the source of the branch to avoid counting cold activity twice (one
232232
/// for source and another for destination).
233233
uint64_t NumColdSamples{0};
234+
uint64_t NumTotalSamples{0};
234235

235236
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
236237
void findPerfExecutable();
@@ -283,8 +284,8 @@ class DataAggregator : public DataReader {
283284
/// everything
284285
bool hasData() const { return !ParsingBuf.empty(); }
285286

286-
/// Print heat map based on LBR samples.
287-
std::error_code printLBRHeatMap();
287+
/// Print heat map based on collected samples.
288+
std::error_code printHeatMap();
288289

289290
/// Parse a single perf sample containing a PID associated with a sequence of
290291
/// LBR entries. If the PID does not correspond to the binary we are looking

bolt/include/bolt/Profile/Heatmap.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ class Heatmap {
5757
}
5858

5959
/// Register a single sample at \p Address.
60-
void registerAddress(uint64_t Address) {
60+
void registerAddress(uint64_t Address, uint64_t Count) {
6161
if (!ignoreAddress(Address))
62-
++Map[Address / BucketSize];
62+
Map[Address / BucketSize] += Count;
6363
}
6464

6565
/// Register \p Count samples at [\p StartAddress, \p EndAddress ].

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 94 additions & 174 deletions
Original file line numberDiff line numberDiff line change
@@ -499,18 +499,15 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
499499
filterBinaryMMapInfo();
500500
prepareToParse("events", MainEventsPPI, ErrorCallback);
501501

502+
if (opts::BasicAggregation ? parseBasicEvents() : parseBranchEvents())
503+
errs() << "PERF2BOLT: failed to parse samples\n";
504+
502505
if (opts::HeatmapMode) {
503-
if (std::error_code EC = printLBRHeatMap()) {
504-
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
505-
exit(1);
506-
}
506+
if (std::error_code EC = printHeatMap())
507+
return errorCodeToError(EC);
507508
exit(0);
508509
}
509510

510-
if ((!opts::BasicAggregation && parseBranchEvents()) ||
511-
(opts::BasicAggregation && parseBasicEvents()))
512-
errs() << "PERF2BOLT: failed to parse samples\n";
513-
514511
// Special handling for memory events
515512
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
516513
return Error::success();
@@ -1322,7 +1319,7 @@ bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const {
13221319
(LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
13231320
}
13241321

1325-
std::error_code DataAggregator::printLBRHeatMap() {
1322+
std::error_code DataAggregator::printHeatMap() {
13261323
outs() << "PERF2BOLT: parse branch events...\n";
13271324
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
13281325
TimerGroupDesc, opts::TimeAggregator);
@@ -1333,53 +1330,6 @@ std::error_code DataAggregator::printLBRHeatMap() {
13331330
}
13341331
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
13351332
opts::HeatmapMaxAddress, getTextSections(BC));
1336-
uint64_t NumTotalSamples = 0;
1337-
1338-
if (opts::BasicAggregation) {
1339-
while (hasData()) {
1340-
ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1341-
if (std::error_code EC = SampleRes.getError()) {
1342-
if (EC == errc::no_such_process)
1343-
continue;
1344-
return EC;
1345-
}
1346-
PerfBasicSample &Sample = SampleRes.get();
1347-
HM.registerAddress(Sample.PC);
1348-
NumTotalSamples++;
1349-
}
1350-
outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1351-
} else {
1352-
while (hasData()) {
1353-
ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1354-
if (std::error_code EC = SampleRes.getError()) {
1355-
if (EC == errc::no_such_process)
1356-
continue;
1357-
return EC;
1358-
}
1359-
1360-
PerfBranchSample &Sample = SampleRes.get();
1361-
1362-
// LBRs are stored in reverse execution order. NextLBR refers to the next
1363-
// executed branch record.
1364-
const LBREntry *NextLBR = nullptr;
1365-
for (const LBREntry &LBR : Sample.LBR) {
1366-
if (NextLBR) {
1367-
// Record fall-through trace.
1368-
const uint64_t TraceFrom = LBR.To;
1369-
const uint64_t TraceTo = NextLBR->From;
1370-
++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1371-
}
1372-
NextLBR = &LBR;
1373-
}
1374-
if (!Sample.LBR.empty()) {
1375-
HM.registerAddress(Sample.LBR.front().To);
1376-
HM.registerAddress(Sample.LBR.back().From);
1377-
}
1378-
NumTotalSamples += Sample.LBR.size();
1379-
}
1380-
outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1381-
outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1382-
}
13831333

13841334
if (!NumTotalSamples) {
13851335
if (opts::BasicAggregation) {
@@ -1395,10 +1345,14 @@ std::error_code DataAggregator::printLBRHeatMap() {
13951345

13961346
outs() << "HEATMAP: building heat map...\n";
13971347

1398-
for (const auto &LBR : FallthroughLBRs) {
1399-
const Trace &Trace = LBR.first;
1400-
const FTInfo &Info = LBR.second;
1401-
HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1348+
if (opts::BasicAggregation) {
1349+
for (const auto &[PC, Hits]: BasicSamples)
1350+
HM.registerAddress(PC, Hits);
1351+
} else {
1352+
for (const auto &[Trace, Info] : FallthroughLBRs)
1353+
HM.registerAddressRange(Trace.From, Trace.To, Info.InternCount);
1354+
for (const auto &[Trace, Info] : BranchLBRs)
1355+
HM.registerAddress(Trace.From, Info.TakenCount);
14021356
}
14031357

14041358
if (HM.getNumInvalidRanges())
@@ -1490,12 +1444,86 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14901444
return NumTraces;
14911445
}
14921446

1447+
static void printColdSamplesDiagnostic() {
1448+
if (NumColdSamples > 0) {
1449+
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1450+
outs() << "PERF2BOLT: " << NumColdSamples
1451+
<< format(" (%.1f%%)", ColdSamples)
1452+
<< " samples recorded in cold regions of split functions.\n";
1453+
if (ColdSamples > 5.0f)
1454+
outs()
1455+
<< "WARNING: The BOLT-processed binary where samples were collected "
1456+
"likely used bad data or your service observed a large shift in "
1457+
"profile. You may want to audit this.\n";
1458+
}
1459+
}
1460+
1461+
static void printLongRangeTracesDiagnostic() {
1462+
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1463+
<< NumLongRangeTraces;
1464+
if (NumTraces > 0)
1465+
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1466+
outs() << "\n";
1467+
}
1468+
1469+
static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
1470+
float T2) {
1471+
if (Denominator == 0) {
1472+
outs() << "\n";
1473+
return 0;
1474+
}
1475+
float Percent = Numerator * 100.0f / Denominator;
1476+
outs() << " (";
1477+
if (outs().has_colors()) {
1478+
if (Percent > T2)
1479+
outs().changeColor(raw_ostream::RED);
1480+
else if (Percent > T1)
1481+
outs().changeColor(raw_ostream::YELLOW);
1482+
else
1483+
outs().changeColor(raw_ostream::GREEN);
1484+
}
1485+
outs() << format("%.1f%%", Percent);
1486+
if (outs().has_colors())
1487+
outs().resetColor();
1488+
outs() << ")\n";
1489+
return Perc;
1490+
}
1491+
1492+
static void printBranchSamplesDiagnostics() {
1493+
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1494+
<< NumInvalidTraces;
1495+
if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
1496+
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1497+
"binary is probably not the same binary used during profiling "
1498+
"collection. The generated data may be ineffective for improving "
1499+
"performance.\n\n";
1500+
printLongRangeTracesDiagnostic();
1501+
printColdSamplesDiagnostic();
1502+
}
1503+
1504+
static void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) {
1505+
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1506+
<< OutOfRangeSamples;
1507+
if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
1508+
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1509+
"binary is probably not the same binary used during profiling "
1510+
"collection. The generated data may be ineffective for improving "
1511+
"performance.\n\n";
1512+
printColdSamplesDiagnostic();
1513+
}
1514+
1515+
static void printBranchStacksDiagnostics(uint64_t IgnoredSamples) {
1516+
outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
1517+
if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
1518+
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1519+
"were attributed to the input binary\n";
1520+
}
1521+
14931522
std::error_code DataAggregator::parseBranchEvents() {
14941523
outs() << "PERF2BOLT: parse branch events...\n";
14951524
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
14961525
TimerGroupDesc, opts::TimeAggregator);
14971526

1498-
uint64_t NumTotalSamples = 0;
14991527
uint64_t NumEntries = 0;
15001528
uint64_t NumSamples = 0;
15011529
uint64_t NumSamplesNoLBR = 0;
@@ -1534,22 +1562,6 @@ std::error_code DataAggregator::parseBranchEvents() {
15341562
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
15351563
BF->setHasProfileAvailable();
15361564

1537-
auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1538-
OS << " (";
1539-
if (OS.has_colors()) {
1540-
if (Percent > T2)
1541-
OS.changeColor(raw_ostream::RED);
1542-
else if (Percent > T1)
1543-
OS.changeColor(raw_ostream::YELLOW);
1544-
else
1545-
OS.changeColor(raw_ostream::GREEN);
1546-
}
1547-
OS << format("%.1f%%", Percent);
1548-
if (OS.has_colors())
1549-
OS.resetColor();
1550-
OS << ")";
1551-
};
1552-
15531565
outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
15541566
<< " LBR entries\n";
15551567
if (NumTotalSamples) {
@@ -1561,47 +1573,10 @@ std::error_code DataAggregator::parseBranchEvents() {
15611573
"in no-LBR mode with -nl (the performance improvement in -nl "
15621574
"mode may be limited)\n";
15631575
} else {
1564-
const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1565-
const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1566-
outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1567-
printColored(outs(), PercentIgnored, 20, 50);
1568-
outs() << " were ignored\n";
1569-
if (PercentIgnored > 50.0f)
1570-
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1571-
"were attributed to the input binary\n";
1576+
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
15721577
}
15731578
}
1574-
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1575-
<< NumInvalidTraces;
1576-
float Perc = 0.0f;
1577-
if (NumTraces > 0) {
1578-
Perc = NumInvalidTraces * 100.0f / NumTraces;
1579-
printColored(outs(), Perc, 5, 10);
1580-
}
1581-
outs() << "\n";
1582-
if (Perc > 10.0f)
1583-
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1584-
"binary is probably not the same binary used during profiling "
1585-
"collection. The generated data may be ineffective for improving "
1586-
"performance.\n\n";
1587-
1588-
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1589-
<< NumLongRangeTraces;
1590-
if (NumTraces > 0)
1591-
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1592-
outs() << "\n";
1593-
1594-
if (NumColdSamples > 0) {
1595-
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1596-
outs() << "PERF2BOLT: " << NumColdSamples
1597-
<< format(" (%.1f%%)", ColdSamples)
1598-
<< " samples recorded in cold regions of split functions.\n";
1599-
if (ColdSamples > 5.0f)
1600-
outs()
1601-
<< "WARNING: The BOLT-processed binary where samples were collected "
1602-
"likely used bad data or your service observed a large shift in "
1603-
"profile. You may want to audit this.\n";
1604-
}
1579+
printBranchSamplesDiagnostics();
16051580

16061581
return std::error_code();
16071582
}
@@ -1658,11 +1633,10 @@ void DataAggregator::processBasicEvents() {
16581633
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
16591634
TimerGroupDesc, opts::TimeAggregator);
16601635
uint64_t OutOfRangeSamples = 0;
1661-
uint64_t NumSamples = 0;
16621636
for (auto &Sample : BasicSamples) {
16631637
const uint64_t PC = Sample.first;
16641638
const uint64_t HitCount = Sample.second;
1665-
NumSamples += HitCount;
1639+
NumTotalSamples += HitCount;
16661640
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
16671641
if (!Func) {
16681642
OutOfRangeSamples += HitCount;
@@ -1673,31 +1647,7 @@ void DataAggregator::processBasicEvents() {
16731647
}
16741648
outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
16751649

1676-
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1677-
<< OutOfRangeSamples;
1678-
float Perc = 0.0f;
1679-
if (NumSamples > 0) {
1680-
outs() << " (";
1681-
Perc = OutOfRangeSamples * 100.0f / NumSamples;
1682-
if (outs().has_colors()) {
1683-
if (Perc > 60.0f)
1684-
outs().changeColor(raw_ostream::RED);
1685-
else if (Perc > 40.0f)
1686-
outs().changeColor(raw_ostream::YELLOW);
1687-
else
1688-
outs().changeColor(raw_ostream::GREEN);
1689-
}
1690-
outs() << format("%.1f%%", Perc);
1691-
if (outs().has_colors())
1692-
outs().resetColor();
1693-
outs() << ")";
1694-
}
1695-
outs() << "\n";
1696-
if (Perc > 80.0f)
1697-
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1698-
"binary is probably not the same binary used during profiling "
1699-
"collection. The generated data may be ineffective for improving "
1700-
"performance.\n\n";
1650+
printBasicSamplesDiagnostics(OutOfRangeSamples);
17011651
}
17021652

17031653
std::error_code DataAggregator::parseMemEvents() {
@@ -1799,37 +1749,7 @@ void DataAggregator::processPreAggregated() {
17991749

18001750
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
18011751
<< " aggregated LBR entries\n";
1802-
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1803-
<< NumInvalidTraces;
1804-
float Perc = 0.0f;
1805-
if (NumTraces > 0) {
1806-
outs() << " (";
1807-
Perc = NumInvalidTraces * 100.0f / NumTraces;
1808-
if (outs().has_colors()) {
1809-
if (Perc > 10.0f)
1810-
outs().changeColor(raw_ostream::RED);
1811-
else if (Perc > 5.0f)
1812-
outs().changeColor(raw_ostream::YELLOW);
1813-
else
1814-
outs().changeColor(raw_ostream::GREEN);
1815-
}
1816-
outs() << format("%.1f%%", Perc);
1817-
if (outs().has_colors())
1818-
outs().resetColor();
1819-
outs() << ")";
1820-
}
1821-
outs() << "\n";
1822-
if (Perc > 10.0f)
1823-
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1824-
"binary is probably not the same binary used during profiling "
1825-
"collection. The generated data may be ineffective for improving "
1826-
"performance.\n\n";
1827-
1828-
outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1829-
<< NumLongRangeTraces;
1830-
if (NumTraces > 0)
1831-
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1832-
outs() << "\n";
1752+
printBranchSamplesDiagnostics();
18331753
}
18341754

18351755
std::optional<int32_t> DataAggregator::parseCommExecEvent() {

bolt/lib/Profile/Heatmap.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ void Heatmap::registerAddressRange(uint64_t StartAddress, uint64_t EndAddress,
4343
}
4444

4545
for (uint64_t Bucket = StartAddress / BucketSize;
46-
Bucket <= EndAddress / BucketSize; ++Bucket)
46+
Bucket < EndAddress / BucketSize; ++Bucket)
4747
Map[Bucket] += Count;
4848
}
4949

0 commit comments

Comments
 (0)