@@ -634,7 +634,7 @@ bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
634634 uint64_t Count) {
635635 BinaryFunction *ParentFunc = getBATParentFunction (OrigFunc);
636636 BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
637- if (ParentFunc || (BAT && !BAT-> isBATFunction (OrigFunc. getAddress ())) )
637+ if (ParentFunc)
638638 NumColdSamples += Count;
639639
640640 auto I = NamesToSamples.find (Func.getOneName ());
@@ -756,13 +756,12 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
756756 Addr = BAT->translate (Func->getAddress (), Addr, IsFrom);
757757
758758 BinaryFunction *ParentFunc = getBATParentFunction (*Func);
759- if (IsFrom &&
760- (ParentFunc || (BAT && !BAT->isBATFunction (Func->getAddress ()))))
761- NumColdSamples += Count;
762-
763759 if (!ParentFunc)
764760 return std::pair{Func, IsRetOrCallCont};
765761
762+ if (IsFrom)
763+ NumColdSamples += Count;
764+
766765 return std::pair{ParentFunc, IsRetOrCallCont};
767766 };
768767
@@ -1423,8 +1422,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
14231422 return std::error_code ();
14241423}
14251424
1426- void DataAggregator::parseLBRSample (const PerfBranchSample &Sample,
1427- bool NeedsSkylakeFix) {
1425+ uint64_t DataAggregator::parseLBRSample (const PerfBranchSample &Sample,
1426+ bool NeedsSkylakeFix) {
1427+ uint64_t NumTraces{0 };
14281428 // LBRs are stored in reverse execution order. NextLBR refers to the next
14291429 // executed branch record.
14301430 const LBREntry *NextLBR = nullptr ;
@@ -1487,93 +1487,19 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14871487 ++Info.TakenCount ;
14881488 Info.MispredCount += LBR.Mispred ;
14891489 }
1490- }
1491-
1492- void DataAggregator::printColdSamplesDiagnostic () const {
1493- if (NumColdSamples > 0 ) {
1494- const float ColdSamples = NumColdSamples * 100 .0f / NumTotalSamples;
1495- outs () << " PERF2BOLT: " << NumColdSamples
1496- << format (" (%.1f%%)" , ColdSamples)
1497- << " samples recorded in cold regions of split functions.\n " ;
1498- if (ColdSamples > 5 .0f )
1499- outs ()
1500- << " WARNING: The BOLT-processed binary where samples were collected "
1501- " likely used bad data or your service observed a large shift in "
1502- " profile. You may want to audit this\n " ;
1503- }
1504- }
1505-
1506- void DataAggregator::printLongRangeTracesDiagnostic () const {
1507- outs () << " PERF2BOLT: out of range traces involving unknown regions: "
1508- << NumLongRangeTraces;
1509- if (NumTraces > 0 )
1510- outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1511- outs () << " \n " ;
1512- }
1513-
1514- static float printColoredPct (uint64_t Numerator, uint64_t Denominator, float T1,
1515- float T2) {
1516- if (Denominator == 0 ) {
1517- outs () << " \n " ;
1518- return 0 ;
1519- }
1520- float Percent = Numerator * 100 .0f / Denominator;
1521- outs () << " (" ;
1522- if (outs ().has_colors ()) {
1523- if (Percent > T2)
1524- outs ().changeColor (raw_ostream::RED);
1525- else if (Percent > T1)
1526- outs ().changeColor (raw_ostream::YELLOW);
1527- else
1528- outs ().changeColor (raw_ostream::GREEN);
1529- }
1530- outs () << format (" %.1f%%" , Percent);
1531- if (outs ().has_colors ())
1532- outs ().resetColor ();
1533- outs () << " )\n " ;
1534- return Percent;
1535- }
1536-
1537- void DataAggregator::printBranchSamplesDiagnostics () const {
1538- outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1539- << NumInvalidTraces;
1540- if (printColoredPct (NumInvalidTraces, NumTraces, 5 , 10 ) > 10 )
1541- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1542- " binary is probably not the same binary used during profiling "
1543- " collection. The generated data may be ineffective for improving "
1544- " performance\n\n " ;
1545- printLongRangeTracesDiagnostic ();
1546- printColdSamplesDiagnostic ();
1547- }
1548-
1549- void DataAggregator::printBasicSamplesDiagnostics (
1550- uint64_t OutOfRangeSamples) const {
1551- outs () << " PERF2BOLT: out of range samples recorded in unknown regions: "
1552- << OutOfRangeSamples;
1553- if (printColoredPct (OutOfRangeSamples, NumTotalSamples, 40 , 60 ) > 80 )
1554- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1555- " binary is probably not the same binary used during profiling "
1556- " collection. The generated data may be ineffective for improving "
1557- " performance\n\n " ;
1558- printColdSamplesDiagnostic ();
1559- }
1560-
1561- void DataAggregator::printBranchStacksDiagnostics (
1562- uint64_t IgnoredSamples) const {
1563- outs () << " PERF2BOLT: ignored samples: " << IgnoredSamples;
1564- if (printColoredPct (IgnoredSamples, NumTotalSamples, 20 , 50 ) > 50 )
1565- errs () << " PERF2BOLT-WARNING: less than 50% of all recorded samples "
1566- " were attributed to the input binary\n " ;
1490+ return NumTraces;
15671491}
15681492
15691493std::error_code DataAggregator::parseBranchEvents () {
15701494 outs () << " PERF2BOLT: parse branch events...\n " ;
15711495 NamedRegionTimer T (" parseBranch" , " Parsing branch events" , TimerGroupName,
15721496 TimerGroupDesc, opts::TimeAggregator);
15731497
1498+ uint64_t NumTotalSamples = 0 ;
15741499 uint64_t NumEntries = 0 ;
15751500 uint64_t NumSamples = 0 ;
15761501 uint64_t NumSamplesNoLBR = 0 ;
1502+ uint64_t NumTraces = 0 ;
15771503 bool NeedsSkylakeFix = false ;
15781504
15791505 while (hasData () && NumTotalSamples < opts::MaxSamples) {
@@ -1600,14 +1526,30 @@ std::error_code DataAggregator::parseBranchEvents() {
16001526 NeedsSkylakeFix = true ;
16011527 }
16021528
1603- parseLBRSample (Sample, NeedsSkylakeFix);
1529+ NumTraces += parseLBRSample (Sample, NeedsSkylakeFix);
16041530 }
16051531
16061532 for (const Trace &Trace : llvm::make_first_range (BranchLBRs))
16071533 for (const uint64_t Addr : {Trace.From , Trace.To })
16081534 if (BinaryFunction *BF = getBinaryFunctionContainingAddress (Addr))
16091535 BF->setHasProfileAvailable ();
16101536
1537+ auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1538+ OS << " (" ;
1539+ if (OS.has_colors ()) {
1540+ if (Percent > T2)
1541+ OS.changeColor (raw_ostream::RED);
1542+ else if (Percent > T1)
1543+ OS.changeColor (raw_ostream::YELLOW);
1544+ else
1545+ OS.changeColor (raw_ostream::GREEN);
1546+ }
1547+ OS << format (" %.1f%%" , Percent);
1548+ if (OS.has_colors ())
1549+ OS.resetColor ();
1550+ OS << " )" ;
1551+ };
1552+
16111553 outs () << " PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
16121554 << " LBR entries\n " ;
16131555 if (NumTotalSamples) {
@@ -1619,10 +1561,47 @@ std::error_code DataAggregator::parseBranchEvents() {
16191561 " in no-LBR mode with -nl (the performance improvement in -nl "
16201562 " mode may be limited)\n " ;
16211563 } else {
1622- printBranchStacksDiagnostics (NumTotalSamples - NumSamples);
1564+ const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1565+ const float PercentIgnored = 100 .0f * IgnoredSamples / NumTotalSamples;
1566+ outs () << " PERF2BOLT: " << IgnoredSamples << " samples" ;
1567+ printColored (outs (), PercentIgnored, 20 , 50 );
1568+ outs () << " were ignored\n " ;
1569+ if (PercentIgnored > 50 .0f )
1570+ errs () << " PERF2BOLT-WARNING: less than 50% of all recorded samples "
1571+ " were attributed to the input binary\n " ;
16231572 }
16241573 }
1625- printBranchSamplesDiagnostics ();
1574+ outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1575+ << NumInvalidTraces;
1576+ float Perc = 0 .0f ;
1577+ if (NumTraces > 0 ) {
1578+ Perc = NumInvalidTraces * 100 .0f / NumTraces;
1579+ printColored (outs (), Perc, 5 , 10 );
1580+ }
1581+ outs () << " \n " ;
1582+ if (Perc > 10 .0f )
1583+ outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1584+ " binary is probably not the same binary used during profiling "
1585+ " collection. The generated data may be ineffective for improving "
1586+ " performance.\n\n " ;
1587+
1588+ outs () << " PERF2BOLT: out of range traces involving unknown regions: "
1589+ << NumLongRangeTraces;
1590+ if (NumTraces > 0 )
1591+ outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1592+ outs () << " \n " ;
1593+
1594+ if (NumColdSamples > 0 ) {
1595+ const float ColdSamples = NumColdSamples * 100 .0f / NumTotalSamples;
1596+ outs () << " PERF2BOLT: " << NumColdSamples
1597+ << format (" (%.1f%%)" , ColdSamples)
1598+ << " samples recorded in cold regions of split functions.\n " ;
1599+ if (ColdSamples > 5 .0f )
1600+ outs ()
1601+ << " WARNING: The BOLT-processed binary where samples were collected "
1602+ " likely used bad data or your service observed a large shift in "
1603+ " profile. You may want to audit this.\n " ;
1604+ }
16261605
16271606 return std::error_code ();
16281607}
@@ -1679,10 +1658,11 @@ void DataAggregator::processBasicEvents() {
16791658 NamedRegionTimer T (" processBasic" , " Processing basic events" , TimerGroupName,
16801659 TimerGroupDesc, opts::TimeAggregator);
16811660 uint64_t OutOfRangeSamples = 0 ;
1661+ uint64_t NumSamples = 0 ;
16821662 for (auto &Sample : BasicSamples) {
16831663 const uint64_t PC = Sample.first ;
16841664 const uint64_t HitCount = Sample.second ;
1685- NumTotalSamples += HitCount;
1665+ NumSamples += HitCount;
16861666 BinaryFunction *Func = getBinaryFunctionContainingAddress (PC);
16871667 if (!Func) {
16881668 OutOfRangeSamples += HitCount;
@@ -1691,9 +1671,33 @@ void DataAggregator::processBasicEvents() {
16911671
16921672 doSample (*Func, PC, HitCount);
16931673 }
1694- outs () << " PERF2BOLT: read " << NumTotalSamples << " samples\n " ;
1674+ outs () << " PERF2BOLT: read " << NumSamples << " samples\n " ;
16951675
1696- printBasicSamplesDiagnostics (OutOfRangeSamples);
1676+ outs () << " PERF2BOLT: out of range samples recorded in unknown regions: "
1677+ << OutOfRangeSamples;
1678+ float Perc = 0 .0f ;
1679+ if (NumSamples > 0 ) {
1680+ outs () << " (" ;
1681+ Perc = OutOfRangeSamples * 100 .0f / NumSamples;
1682+ if (outs ().has_colors ()) {
1683+ if (Perc > 60 .0f )
1684+ outs ().changeColor (raw_ostream::RED);
1685+ else if (Perc > 40 .0f )
1686+ outs ().changeColor (raw_ostream::YELLOW);
1687+ else
1688+ outs ().changeColor (raw_ostream::GREEN);
1689+ }
1690+ outs () << format (" %.1f%%" , Perc);
1691+ if (outs ().has_colors ())
1692+ outs ().resetColor ();
1693+ outs () << " )" ;
1694+ }
1695+ outs () << " \n " ;
1696+ if (Perc > 80 .0f )
1697+ outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1698+ " binary is probably not the same binary used during profiling "
1699+ " collection. The generated data may be ineffective for improving "
1700+ " performance.\n\n " ;
16971701}
16981702
16991703std::error_code DataAggregator::parseMemEvents () {
@@ -1771,13 +1775,13 @@ void DataAggregator::processPreAggregated() {
17711775 NamedRegionTimer T (" processAggregated" , " Processing aggregated branch events" ,
17721776 TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
17731777
1778+ uint64_t NumTraces = 0 ;
17741779 for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
17751780 switch (AggrEntry.EntryType ) {
17761781 case AggregatedLBREntry::BRANCH:
17771782 case AggregatedLBREntry::TRACE:
17781783 doBranch (AggrEntry.From .Offset , AggrEntry.To .Offset , AggrEntry.Count ,
17791784 AggrEntry.Mispreds );
1780- NumTotalSamples += AggrEntry.Count ;
17811785 break ;
17821786 case AggregatedLBREntry::FT:
17831787 case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
@@ -1795,7 +1799,37 @@ void DataAggregator::processPreAggregated() {
17951799
17961800 outs () << " PERF2BOLT: read " << AggregatedLBRs.size ()
17971801 << " aggregated LBR entries\n " ;
1798- printBranchSamplesDiagnostics ();
1802+ outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1803+ << NumInvalidTraces;
1804+ float Perc = 0 .0f ;
1805+ if (NumTraces > 0 ) {
1806+ outs () << " (" ;
1807+ Perc = NumInvalidTraces * 100 .0f / NumTraces;
1808+ if (outs ().has_colors ()) {
1809+ if (Perc > 10 .0f )
1810+ outs ().changeColor (raw_ostream::RED);
1811+ else if (Perc > 5 .0f )
1812+ outs ().changeColor (raw_ostream::YELLOW);
1813+ else
1814+ outs ().changeColor (raw_ostream::GREEN);
1815+ }
1816+ outs () << format (" %.1f%%" , Perc);
1817+ if (outs ().has_colors ())
1818+ outs ().resetColor ();
1819+ outs () << " )" ;
1820+ }
1821+ outs () << " \n " ;
1822+ if (Perc > 10 .0f )
1823+ outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1824+ " binary is probably not the same binary used during profiling "
1825+ " collection. The generated data may be ineffective for improving "
1826+ " performance.\n\n " ;
1827+
1828+ outs () << " PERF2BOLT: Out of range traces involving unknown regions: "
1829+ << NumLongRangeTraces;
1830+ if (NumTraces > 0 )
1831+ outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1832+ outs () << " \n " ;
17991833}
18001834
18011835std::optional<int32_t > DataAggregator::parseCommExecEvent () {
0 commit comments