@@ -634,7 +634,7 @@ bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
634634 uint64_t Count) {
635635 BinaryFunction *ParentFunc = getBATParentFunction (OrigFunc);
636636 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
637- if (ParentFunc)
637+ if (ParentFunc || (BAT && !BAT-> isBATFunction (OrigFunc. getAddress ())) )
638638 NumColdSamples += Count;
639639
640640 auto I = NamesToSamples.find (Func.getOneName ());
@@ -756,12 +756,13 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
756756 Addr = BAT->translate (Func->getAddress (), Addr, IsFrom);
757757
758758 BinaryFunction *ParentFunc = getBATParentFunction (*Func);
759+ if (IsFrom &&
760+ (ParentFunc || (BAT && !BAT->isBATFunction (Func->getAddress ()))))
761+ NumColdSamples += Count;
762+
759763 if (!ParentFunc)
760764 return std::pair{Func, IsRetOrCallCont};
761765
762- if (IsFrom)
763- NumColdSamples += Count;
764-
765766 return std::pair{ParentFunc, IsRetOrCallCont};
766767 };
767768
@@ -1422,9 +1423,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
14221423 return std::error_code ();
14231424}
14241425
1425- uint64_t DataAggregator::parseLBRSample (const PerfBranchSample &Sample,
1426- bool NeedsSkylakeFix) {
1427- uint64_t NumTraces{0 };
1426+ void DataAggregator::parseLBRSample (const PerfBranchSample &Sample,
1427+ bool NeedsSkylakeFix) {
14281428 // LBRs are stored in reverse execution order. NextLBR refers to the next
14291429 // executed branch record.
14301430 const LBREntry *NextLBR = nullptr ;
@@ -1487,19 +1487,93 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14871487 ++Info.TakenCount ;
14881488 Info.MispredCount += LBR.Mispred ;
14891489 }
1490- return NumTraces;
1490+ }
1491+
1492+ void DataAggregator::printColdSamplesDiagnostic () const {
1493+ if (NumColdSamples > 0 ) {
1494+ const float ColdSamples = NumColdSamples * 100 .0f / NumTotalSamples;
1495+ outs () << " PERF2BOLT: " << NumColdSamples
1496+ << format (" (%.1f%%)" , ColdSamples)
1497+ << " samples recorded in cold regions of split functions.\n " ;
1498+ if (ColdSamples > 5 .0f )
1499+ outs ()
1500+ << " WARNING: The BOLT-processed binary where samples were collected "
1501+ " likely used bad data or your service observed a large shift in "
1502+ " profile. You may want to audit this\n " ;
1503+ }
1504+ }
1505+
1506+ void DataAggregator::printLongRangeTracesDiagnostic () const {
1507+ outs () << " PERF2BOLT: out of range traces involving unknown regions: "
1508+ << NumLongRangeTraces;
1509+ if (NumTraces > 0 )
1510+ outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1511+ outs () << " \n " ;
1512+ }
1513+
1514+ static float printColoredPct (uint64_t Numerator, uint64_t Denominator, float T1,
1515+ float T2) {
1516+ if (Denominator == 0 ) {
1517+ outs () << " \n " ;
1518+ return 0 ;
1519+ }
1520+ float Percent = Numerator * 100 .0f / Denominator;
1521+ outs () << " (" ;
1522+ if (outs ().has_colors ()) {
1523+ if (Percent > T2)
1524+ outs ().changeColor (raw_ostream::RED);
1525+ else if (Percent > T1)
1526+ outs ().changeColor (raw_ostream::YELLOW);
1527+ else
1528+ outs ().changeColor (raw_ostream::GREEN);
1529+ }
1530+ outs () << format (" %.1f%%" , Percent);
1531+ if (outs ().has_colors ())
1532+ outs ().resetColor ();
1533+ outs () << " )\n " ;
1534+ return Percent;
1535+ }
1536+
1537+ void DataAggregator::printBranchSamplesDiagnostics () const {
1538+ outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1539+ << NumInvalidTraces;
1540+ if (printColoredPct (NumInvalidTraces, NumTraces, 5 , 10 ) > 10 )
1541+ outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1542+ " binary is probably not the same binary used during profiling "
1543+ " collection. The generated data may be ineffective for improving "
1544+ " performance\n\n " ;
1545+ printLongRangeTracesDiagnostic ();
1546+ printColdSamplesDiagnostic ();
1547+ }
1548+
1549+ void DataAggregator::printBasicSamplesDiagnostics (
1550+ uint64_t OutOfRangeSamples) const {
1551+ outs () << " PERF2BOLT: out of range samples recorded in unknown regions: "
1552+ << OutOfRangeSamples;
1553+ if (printColoredPct (OutOfRangeSamples, NumTotalSamples, 40 , 60 ) > 80 )
1554+ outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1555+ " binary is probably not the same binary used during profiling "
1556+ " collection. The generated data may be ineffective for improving "
1557+ " performance\n\n " ;
1558+ printColdSamplesDiagnostic ();
1559+ }
1560+
1561+ void DataAggregator::printBranchStacksDiagnostics (
1562+ uint64_t IgnoredSamples) const {
1563+ outs () << " PERF2BOLT: ignored samples: " << IgnoredSamples;
1564+ if (printColoredPct (IgnoredSamples, NumTotalSamples, 20 , 50 ) > 50 )
1565+ errs () << " PERF2BOLT-WARNING: less than 50% of all recorded samples "
1566+ " were attributed to the input binary\n " ;
14911567}
14921568
14931569std::error_code DataAggregator::parseBranchEvents () {
14941570 outs () << " PERF2BOLT: parse branch events...\n " ;
14951571 NamedRegionTimer T (" parseBranch" , " Parsing branch events" , TimerGroupName,
14961572 TimerGroupDesc, opts::TimeAggregator);
14971573
1498- uint64_t NumTotalSamples = 0 ;
14991574 uint64_t NumEntries = 0 ;
15001575 uint64_t NumSamples = 0 ;
15011576 uint64_t NumSamplesNoLBR = 0 ;
1502- uint64_t NumTraces = 0 ;
15031577 bool NeedsSkylakeFix = false ;
15041578
15051579 while (hasData () && NumTotalSamples < opts::MaxSamples) {
@@ -1526,30 +1600,14 @@ std::error_code DataAggregator::parseBranchEvents() {
15261600 NeedsSkylakeFix = true ;
15271601 }
15281602
1529- NumTraces += parseLBRSample (Sample, NeedsSkylakeFix);
1603+ parseLBRSample (Sample, NeedsSkylakeFix);
15301604 }
15311605
15321606 for (const Trace &Trace : llvm::make_first_range (BranchLBRs))
15331607 for (const uint64_t Addr : {Trace.From , Trace.To })
15341608 if (BinaryFunction *BF = getBinaryFunctionContainingAddress (Addr))
15351609 BF->setHasProfileAvailable ();
15361610
1537- auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1538- OS << " (" ;
1539- if (OS.has_colors ()) {
1540- if (Percent > T2)
1541- OS.changeColor (raw_ostream::RED);
1542- else if (Percent > T1)
1543- OS.changeColor (raw_ostream::YELLOW);
1544- else
1545- OS.changeColor (raw_ostream::GREEN);
1546- }
1547- OS << format (" %.1f%%" , Percent);
1548- if (OS.has_colors ())
1549- OS.resetColor ();
1550- OS << " )" ;
1551- };
1552-
15531611 outs () << " PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
15541612 << " LBR entries\n " ;
15551613 if (NumTotalSamples) {
@@ -1561,47 +1619,10 @@ std::error_code DataAggregator::parseBranchEvents() {
15611619 " in no-LBR mode with -nl (the performance improvement in -nl "
15621620 " mode may be limited)\n " ;
15631621 } else {
1564- const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1565- const float PercentIgnored = 100 .0f * IgnoredSamples / NumTotalSamples;
1566- outs () << " PERF2BOLT: " << IgnoredSamples << " samples" ;
1567- printColored (outs (), PercentIgnored, 20 , 50 );
1568- outs () << " were ignored\n " ;
1569- if (PercentIgnored > 50 .0f )
1570- errs () << " PERF2BOLT-WARNING: less than 50% of all recorded samples "
1571- " were attributed to the input binary\n " ;
1622+ printBranchStacksDiagnostics (NumTotalSamples - NumSamples);
15721623 }
15731624 }
1574- outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1575- << NumInvalidTraces;
1576- float Perc = 0 .0f ;
1577- if (NumTraces > 0 ) {
1578- Perc = NumInvalidTraces * 100 .0f / NumTraces;
1579- printColored (outs (), Perc, 5 , 10 );
1580- }
1581- outs () << " \n " ;
1582- if (Perc > 10 .0f )
1583- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1584- " binary is probably not the same binary used during profiling "
1585- " collection. The generated data may be ineffective for improving "
1586- " performance.\n\n " ;
1587-
1588- outs () << " PERF2BOLT: out of range traces involving unknown regions: "
1589- << NumLongRangeTraces;
1590- if (NumTraces > 0 )
1591- outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1592- outs () << " \n " ;
1593-
1594- if (NumColdSamples > 0 ) {
1595- const float ColdSamples = NumColdSamples * 100 .0f / NumTotalSamples;
1596- outs () << " PERF2BOLT: " << NumColdSamples
1597- << format (" (%.1f%%)" , ColdSamples)
1598- << " samples recorded in cold regions of split functions.\n " ;
1599- if (ColdSamples > 5 .0f )
1600- outs ()
1601- << " WARNING: The BOLT-processed binary where samples were collected "
1602- " likely used bad data or your service observed a large shift in "
1603- " profile. You may want to audit this.\n " ;
1604- }
1625+ printBranchSamplesDiagnostics ();
16051626
16061627 return std::error_code ();
16071628}
@@ -1658,11 +1679,10 @@ void DataAggregator::processBasicEvents() {
16581679 NamedRegionTimer T (" processBasic" , " Processing basic events" , TimerGroupName,
16591680 TimerGroupDesc, opts::TimeAggregator);
16601681 uint64_t OutOfRangeSamples = 0 ;
1661- uint64_t NumSamples = 0 ;
16621682 for (auto &Sample : BasicSamples) {
16631683 const uint64_t PC = Sample.first ;
16641684 const uint64_t HitCount = Sample.second ;
1665- NumSamples += HitCount;
1685+ NumTotalSamples += HitCount;
16661686 BinaryFunction *Func = getBinaryFunctionContainingAddress (PC);
16671687 if (!Func) {
16681688 OutOfRangeSamples += HitCount;
@@ -1671,33 +1691,9 @@ void DataAggregator::processBasicEvents() {
16711691
16721692 doSample (*Func, PC, HitCount);
16731693 }
1674- outs () << " PERF2BOLT: read " << NumSamples << " samples\n " ;
1694+ outs () << " PERF2BOLT: read " << NumTotalSamples << " samples\n " ;
16751695
1676- outs () << " PERF2BOLT: out of range samples recorded in unknown regions: "
1677- << OutOfRangeSamples;
1678- float Perc = 0 .0f ;
1679- if (NumSamples > 0 ) {
1680- outs () << " (" ;
1681- Perc = OutOfRangeSamples * 100 .0f / NumSamples;
1682- if (outs ().has_colors ()) {
1683- if (Perc > 60 .0f )
1684- outs ().changeColor (raw_ostream::RED);
1685- else if (Perc > 40 .0f )
1686- outs ().changeColor (raw_ostream::YELLOW);
1687- else
1688- outs ().changeColor (raw_ostream::GREEN);
1689- }
1690- outs () << format (" %.1f%%" , Perc);
1691- if (outs ().has_colors ())
1692- outs ().resetColor ();
1693- outs () << " )" ;
1694- }
1695- outs () << " \n " ;
1696- if (Perc > 80 .0f )
1697- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1698- " binary is probably not the same binary used during profiling "
1699- " collection. The generated data may be ineffective for improving "
1700- " performance.\n\n " ;
1696+ printBasicSamplesDiagnostics (OutOfRangeSamples);
17011697}
17021698
17031699std::error_code DataAggregator::parseMemEvents () {
@@ -1775,13 +1771,13 @@ void DataAggregator::processPreAggregated() {
17751771 NamedRegionTimer T (" processAggregated" , " Processing aggregated branch events" ,
17761772 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
17771773
1778- uint64_t NumTraces = 0 ;
17791774 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
17801775 switch (AggrEntry.EntryType ) {
17811776 case AggregatedLBREntry::BRANCH:
17821777 case AggregatedLBREntry::TRACE:
17831778 doBranch (AggrEntry.From .Offset , AggrEntry.To .Offset , AggrEntry.Count ,
17841779 AggrEntry.Mispreds );
1780+ NumTotalSamples += AggrEntry.Count ;
17851781 break ;
17861782 case AggregatedLBREntry::FT:
17871783 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
@@ -1799,37 +1795,7 @@ void DataAggregator::processPreAggregated() {
17991795
18001796 outs () << " PERF2BOLT: read " << AggregatedLBRs.size ()
18011797 << " aggregated LBR entries\n " ;
1802- outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1803- << NumInvalidTraces;
1804- float Perc = 0 .0f ;
1805- if (NumTraces > 0 ) {
1806- outs () << " (" ;
1807- Perc = NumInvalidTraces * 100 .0f / NumTraces;
1808- if (outs ().has_colors ()) {
1809- if (Perc > 10 .0f )
1810- outs ().changeColor (raw_ostream::RED);
1811- else if (Perc > 5 .0f )
1812- outs ().changeColor (raw_ostream::YELLOW);
1813- else
1814- outs ().changeColor (raw_ostream::GREEN);
1815- }
1816- outs () << format (" %.1f%%" , Perc);
1817- if (outs ().has_colors ())
1818- outs ().resetColor ();
1819- outs () << " )" ;
1820- }
1821- outs () << " \n " ;
1822- if (Perc > 10 .0f )
1823- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1824- " binary is probably not the same binary used during profiling "
1825- " collection. The generated data may be ineffective for improving "
1826- " performance.\n\n " ;
1827-
1828- outs () << " PERF2BOLT: Out of range traces involving unknown regions: "
1829- << NumLongRangeTraces;
1830- if (NumTraces > 0 )
1831- outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1832- outs () << " \n " ;
1798+ printBranchSamplesDiagnostics ();
18331799}
18341800
18351801std::optional<int32_t > DataAggregator::parseCommExecEvent () {
0 commit comments