1919#include < unordered_map>
2020#include < unordered_set>
2121
22- #define DEBUG_TYPE " bolt-opts"
23-
2422using namespace llvm ;
2523using namespace bolt ;
2624
@@ -34,7 +32,7 @@ cl::opt<unsigned> NumFunctionsForProfileQualityCheck(
3432cl::opt<unsigned > PercentileForProfileQualityCheck (
3533 " percentile-for-profile-quality-check" ,
3634 cl::desc (" Percentile of profile quality distributions over hottest "
37- " functions to display ." ),
35+ " functions to report ." ),
3836 cl::init(95 ), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
3937} // namespace opts
4038
@@ -94,8 +92,7 @@ void printCFGContinuityStats(raw_ostream &OS,
9492 std::vector<size_t > SumECUnreachables;
9593 std::vector<double > FractionECUnreachables;
9694
97- for (auto it = Functions.begin (); it != Functions.end (); ++it) {
98- const BinaryFunction *Function = *it;
95+ for (const BinaryFunction *Function : Functions) {
9996 if (Function->size () <= 1 )
10097 continue ;
10198
@@ -104,28 +101,32 @@ void printCFGContinuityStats(raw_ostream &OS,
104101 size_t SumAllBBEC = 0 ;
105102 for (const BinaryBasicBlock &BB : *Function) {
106103 const size_t BBEC = BB.getKnownExecutionCount ();
107- NumPosECBBs += BBEC > 0 ? 1 : 0 ;
104+ NumPosECBBs += !! BBEC;
108105 SumAllBBEC += BBEC;
109106 }
110107
111108 // Perform BFS on subgraph of CFG induced by positive weight edges.
112109 // Compute the number of BBs reachable from the entry(s) of the function and
113110 // the sum of their execution counts (ECs).
114- std::unordered_map<unsigned , const BinaryBasicBlock *> IndexToBB;
115111 std::unordered_set<unsigned > Visited;
116112 std::queue<unsigned > Queue;
117- for (const BinaryBasicBlock &BB : *Function) {
118- // Make sure BB.getIndex() is not already in IndexToBB.
119- assert (IndexToBB.find (BB.getIndex ()) == IndexToBB.end ());
120- IndexToBB[BB.getIndex ()] = &BB;
121- if (BB.isEntryPoint () && BB.getKnownExecutionCount () > 0 ) {
122- Queue.push (BB.getIndex ());
123- Visited.insert (BB.getIndex ());
113+ size_t SumReachableBBEC = 0 ;
114+
115+ Function->forEachEntryPoint ([&](uint64_t Offset, const MCSymbol *Label) {
116+ const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset (Offset);
117+ if (EntryBB && EntryBB->getKnownExecutionCount () > 0 ) {
118+ Queue.push (EntryBB->getLayoutIndex ());
119+ Visited.insert (EntryBB->getLayoutIndex ());
120+ SumReachableBBEC += EntryBB->getKnownExecutionCount ();
124121 }
125- }
122+ return true ;
123+ });
124+
125+ const FunctionLayout &Layout = Function->getLayout ();
126+
126127 while (!Queue.empty ()) {
127128 const unsigned BBIndex = Queue.front ();
128- const BinaryBasicBlock *BB = IndexToBB[ BBIndex] ;
129+ const BinaryBasicBlock *BB = Layout. getBlock ( BBIndex) ;
129130 Queue.pop ();
130131 auto SuccBIIter = BB->branch_info_begin ();
131132 for (const BinaryBasicBlock *Succ : BB->successors ()) {
@@ -134,25 +135,18 @@ void printCFGContinuityStats(raw_ostream &OS,
134135 ++SuccBIIter;
135136 continue ;
136137 }
137- if (!Visited.insert (Succ->getIndex ()).second ) {
138+ if (!Visited.insert (Succ->getLayoutIndex ()).second ) {
138139 ++SuccBIIter;
139140 continue ;
140141 }
141- Queue.push (Succ->getIndex ());
142+ SumReachableBBEC += Succ->getKnownExecutionCount ();
143+ Queue.push (Succ->getLayoutIndex ());
142144 ++SuccBIIter;
143145 }
144146 }
145147
146148 const size_t NumReachableBBs = Visited.size ();
147149
148- // Loop through Visited, and sum the corresponding BBs' execution counts
149- // (ECs).
150- size_t SumReachableBBEC = 0 ;
151- for (const unsigned BBIndex : Visited) {
152- const BinaryBasicBlock *BB = IndexToBB[BBIndex];
153- SumReachableBBEC += BB->getKnownExecutionCount ();
154- }
155-
156150 const size_t NumPosECBBsUnreachableFromEntry =
157151 NumPosECBBs - NumReachableBBs;
158152 const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
@@ -162,7 +156,8 @@ void printCFGContinuityStats(raw_ostream &OS,
162156 if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05 ) {
163157 OS << " Non-trivial CFG discontinuity observed in function "
164158 << Function->getPrintName () << " \n " ;
165- LLVM_DEBUG (Function->dump ());
159+ if (opts::Verbosity >= 3 )
160+ Function->dump ();
166161 }
167162
168163 NumUnreachables.push_back (NumPosECBBsUnreachableFromEntry);
@@ -176,11 +171,10 @@ void printCFGContinuityStats(raw_ostream &OS,
176171 std::sort (FractionECUnreachables.begin (), FractionECUnreachables.end ());
177172 const int Rank = int (FractionECUnreachables.size () *
178173 opts::PercentileForProfileQualityCheck / 100 );
179- OS << format (" top %zu%% function CFG discontinuity is %.2lf%%\n " ,
180- 100 - opts::PercentileForProfileQualityCheck,
174+ OS << format (" function CFG discontinuity %.2lf%%; " ,
181175 FractionECUnreachables[Rank] * 100 );
182176 if (opts::Verbosity >= 1 ) {
183- OS << " abbreviations : EC = execution count, POS BBs = positive EC BBs\n "
177+ OS << " \n abbreviations : EC = execution count, POS BBs = positive EC BBs\n "
184178 << " distribution of NUM(unreachable POS BBs) per function\n " ;
185179 std::sort (NumUnreachables.begin (), NumUnreachables.end ());
186180 printDistribution (OS, NumUnreachables);
@@ -200,8 +194,7 @@ void printCallGraphFlowConservationStats(
200194 FlowInfo &TotalFlowMap) {
201195 std::vector<double > CallGraphGaps;
202196
203- for (auto it = Functions.begin (); it != Functions.end (); ++it) {
204- const BinaryFunction *Function = *it;
197+ for (const BinaryFunction *Function : Functions) {
205198 if (Function->size () <= 1 || !Function->isSimple ())
206199 continue ;
207200
@@ -223,20 +216,22 @@ void printCallGraphFlowConservationStats(
223216 continue ;
224217 NumConsideredEntryBlocks++;
225218
226- EntryInflow += IncomingMap[BB.getIndex ()];
227- EntryOutflow += OutgoingMap[BB.getIndex ()];
219+ EntryInflow += IncomingMap[BB.getLayoutIndex ()];
220+ EntryOutflow += OutgoingMap[BB.getLayoutIndex ()];
228221 }
229222 }
230223 uint64_t NetEntryOutflow = 0 ;
231224 if (EntryOutflow < EntryInflow) {
232- if (opts::Verbosity >= 1 ) {
225+ if (opts::Verbosity >= 2 ) {
233226 // We expect entry blocks' CFG outflow >= inflow, i.e., it has a
234227 // non-negative net outflow. If this is not the case, then raise a
235228 // warning if requested.
236- OS << " BOLT WARNING: unexpected entry block CFG outflow < inflow in "
229+ OS << " BOLT WARNING: unexpected entry block CFG outflow < inflow "
230+ " in "
237231 " function "
238232 << Function->getPrintName () << " \n " ;
239- LLVM_DEBUG (Function->dump ());
233+ if (opts::Verbosity >= 3 )
234+ Function->dump ();
240235 }
241236 } else {
242237 NetEntryOutflow = EntryOutflow - EntryInflow;
@@ -252,25 +247,26 @@ void printCallGraphFlowConservationStats(
252247 OS << " Nontrivial call graph gap of size "
253248 << format (" %.2lf%%" , 100 * CallGraphGap)
254249 << " observed in function " << Function->getPrintName () << " \n " ;
255- LLVM_DEBUG (Function->dump ());
250+ if (opts::Verbosity >= 3 )
251+ Function->dump ();
256252 }
257253
258254 CallGraphGaps.push_back (CallGraphGap);
259255 }
260256 }
261257 }
262258
263- if (! CallGraphGaps.empty ()) {
264- std::sort (CallGraphGaps. begin (), CallGraphGaps. end ()) ;
265- const int Rank = int (CallGraphGaps. size () *
266- opts::PercentileForProfileQualityCheck / 100 );
267- OS << format ( " top %zu%% call graph flow conservation gap is %.2lf%% \n " ,
268- 100 - opts::PercentileForProfileQualityCheck,
269- CallGraphGaps[Rank] * 100 );
270- if (opts::Verbosity >= 1 ) {
271- OS << " distribution of function entry flow conservation gaps \n " ;
272- printDistribution (OS, CallGraphGaps, /* Fraction= */ true ) ;
273- }
259+ if (CallGraphGaps.empty ())
260+ return ;
261+
262+ std::sort (CallGraphGaps. begin (), CallGraphGaps. end () );
263+ const int Rank =
264+ int (CallGraphGaps. size () * opts::PercentileForProfileQualityCheck / 100 );
265+ OS << format ( " call graph flow conservation gap %.2lf%%; " ,
266+ CallGraphGaps[Rank] * 100 );
267+ if (opts::Verbosity >= 1 ) {
268+ OS << " \n distribution of function entry flow conservation gaps \n " ;
269+ printDistribution (OS, CallGraphGaps, /* Fraction= */ true );
274270 }
275271}
276272
@@ -281,8 +277,7 @@ void printCFGFlowConservationStats(raw_ostream &OS,
281277 std::vector<double > CFGGapsWorst;
282278 std::vector<uint64_t > CFGGapsWorstAbs;
283279
284- for (auto it = Functions.begin (); it != Functions.end (); ++it) {
285- const BinaryFunction *Function = *it;
280+ for (const BinaryFunction *Function : Functions) {
286281 if (Function->size () <= 1 || !Function->isSimple ())
287282 continue ;
288283
@@ -301,8 +296,8 @@ void printCFGFlowConservationStats(raw_ostream &OS,
301296 if (BB.isEntryPoint () || BB.succ_size () == 0 )
302297 continue ;
303298
304- const uint64_t Max = MaxCountMaps[BB.getIndex ()];
305- const uint64_t Min = MinCountMaps[BB.getIndex ()];
299+ const uint64_t Max = MaxCountMaps[BB.getLayoutIndex ()];
300+ const uint64_t Min = MinCountMaps[BB.getLayoutIndex ()];
306301 const double Gap = 1 - (double )Min / Max;
307302 double Weight = BB.getKnownExecutionCount () * BB.getNumNonPseudos ();
308303 if (Weight == 0 )
@@ -335,7 +330,8 @@ void printCFGFlowConservationStats(raw_ostream &OS,
335330 OS << " Worst gap (absolute value): " << WorstGapAbs << " at BB with "
336331 << " input offset 0x"
337332 << Twine::utohexstr (BBWorstGapAbs->getInputOffset ()) << " \n " ;
338- LLVM_DEBUG (Function->dump ());
333+ if (opts::Verbosity >= 3 )
334+ Function->dump ();
339335 }
340336
341337 CFGGapsWeightedAvg.push_back (WeightedGap);
@@ -344,30 +340,27 @@ void printCFGFlowConservationStats(raw_ostream &OS,
344340 }
345341 }
346342
347- if (!CFGGapsWeightedAvg.empty ()) {
348- std::sort (CFGGapsWeightedAvg.begin (), CFGGapsWeightedAvg.end ());
349- const int RankWA = int (CFGGapsWeightedAvg.size () *
350- opts::PercentileForProfileQualityCheck / 100 );
351- std::sort (CFGGapsWorst.begin (), CFGGapsWorst.end ());
352- const int RankW =
353- int (CFGGapsWorst.size () * opts::PercentileForProfileQualityCheck / 100 );
354- OS << format (
355- " top %zu%% CFG flow conservation gap is %.2lf%% (weighted) and "
356- " %.2lf%% (worst)\n " ,
357- 100 - opts::PercentileForProfileQualityCheck,
358- CFGGapsWeightedAvg[RankWA] * 100 , CFGGapsWorst[RankW] * 100 );
359- if (opts::Verbosity >= 1 ) {
360- OS << " distribution of weighted CFG flow conservation gaps\n " ;
361- printDistribution (OS, CFGGapsWeightedAvg, /* Fraction=*/ true );
362- OS << " Consider only blocks with execution counts > 500:\n "
363- << " distribution of worst block flow conservation gap per "
364- " function \n " ;
365- printDistribution (OS, CFGGapsWorst, /* Fraction=*/ true );
366- OS << " distribution of worst block flow conservation gap (absolute "
367- " value) per function\n " ;
368- std::sort (CFGGapsWorstAbs.begin (), CFGGapsWorstAbs.end ());
369- printDistribution (OS, CFGGapsWorstAbs, /* Fraction=*/ false );
370- }
343+ if (CFGGapsWeightedAvg.empty ())
344+ return ;
345+ std::sort (CFGGapsWeightedAvg.begin (), CFGGapsWeightedAvg.end ());
346+ const int RankWA = int (CFGGapsWeightedAvg.size () *
347+ opts::PercentileForProfileQualityCheck / 100 );
348+ std::sort (CFGGapsWorst.begin (), CFGGapsWorst.end ());
349+ const int RankW =
350+ int (CFGGapsWorst.size () * opts::PercentileForProfileQualityCheck / 100 );
351+ OS << format (" CFG flow conservation gap %.2lf%% (weighted) %.2lf%% (worst)\n " ,
352+ CFGGapsWeightedAvg[RankWA] * 100 , CFGGapsWorst[RankW] * 100 );
353+ if (opts::Verbosity >= 1 ) {
354+ OS << " distribution of weighted CFG flow conservation gaps\n " ;
355+ printDistribution (OS, CFGGapsWeightedAvg, /* Fraction=*/ true );
356+ OS << " Consider only blocks with execution counts > 500:\n "
357+ << " distribution of worst block flow conservation gap per "
358+ " function \n " ;
359+ printDistribution (OS, CFGGapsWorst, /* Fraction=*/ true );
360+ OS << " distribution of worst block flow conservation gap (absolute "
361+ " value) per function\n " ;
362+ std::sort (CFGGapsWorstAbs.begin (), CFGGapsWorstAbs.end ());
363+ printDistribution (OS, CFGGapsWorstAbs, /* Fraction=*/ false );
371364 }
372365}
373366
@@ -391,10 +384,10 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
391384 continue ;
392385 }
393386 TotalOutgoing += Count;
394- IncomingMap[Succ->getIndex ()] += Count;
387+ IncomingMap[Succ->getLayoutIndex ()] += Count;
395388 ++SuccBIIter;
396389 }
397- OutgoingMap[BB.getIndex ()] = TotalOutgoing;
390+ OutgoingMap[BB.getLayoutIndex ()] = TotalOutgoing;
398391 }
399392 }
400393
@@ -412,7 +405,7 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
412405 FlowMapTy &MaxCountMap = TotalMaxCountMaps[FunctionNum];
413406 FlowMapTy &MinCountMap = TotalMinCountMaps[FunctionNum];
414407 for (const BinaryBasicBlock &BB : *Function) {
415- uint64_t BBNum = BB.getIndex ();
408+ uint64_t BBNum = BB.getLayoutIndex ();
416409 MaxCountMap[BBNum] = std::max (IncomingMap[BBNum], OutgoingMap[BBNum]);
417410 MinCountMap[BBNum] = std::min (IncomingMap[BBNum], OutgoingMap[BBNum]);
418411 }
@@ -429,17 +422,20 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
429422
430423 // Update MaxCountMap, MinCountMap, and CallGraphIncomingMap
431424 auto recordCall = [&](const BinaryBasicBlock *SourceBB,
432- const MCSymbol *DestSymbol, uint64_t Count) {
425+ const MCSymbol *DestSymbol, uint64_t Count,
426+ uint64_t TotalCallCount) {
433427 if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
434428 Count = 0 ;
435429 const BinaryFunction *DstFunc =
436430 DestSymbol ? BC.getFunctionForSymbol (DestSymbol) : nullptr ;
437431 if (DstFunc)
438432 CallGraphIncomingMap[DstFunc->getFunctionNumber ()] += Count;
439433 if (SourceBB) {
440- unsigned BlockIndex = SourceBB->getIndex ();
441- MaxCountMap[BlockIndex] = std::max (MaxCountMap[BlockIndex], Count);
442- MinCountMap[BlockIndex] = std::min (MinCountMap[BlockIndex], Count);
434+ unsigned BlockIndex = SourceBB->getLayoutIndex ();
435+ MaxCountMap[BlockIndex] =
436+ std::max (MaxCountMap[BlockIndex], TotalCallCount);
437+ MinCountMap[BlockIndex] =
438+ std::min (MinCountMap[BlockIndex], TotalCallCount);
443439 }
444440 };
445441
@@ -452,7 +448,6 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
452448 CallInfoTy Counts;
453449 const MCSymbol *DstSym = BC.MIB ->getTargetSymbol (Inst);
454450
455- // If this is an indirect call use perf data directly.
456451 if (!DstSym && BC.MIB ->hasAnnotation (Inst, " CallProfile" )) {
457452 const auto &ICSP = BC.MIB ->getAnnotationAs <IndirectCallSiteProfile>(
458453 Inst, " CallProfile" );
@@ -471,22 +466,25 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
471466 // directly. The call EC is only used to update CallGraphIncomingMap.
472467 if (!Function->hasValidProfile () && !Function->getAllCallSites ().empty ()) {
473468 for (const IndirectCallProfile &CSI : Function->getAllCallSites ()) {
474- if (!CSI.Symbol )
475- continue ;
476- recordCall (nullptr , CSI.Symbol , CSI.Count );
469+ if (CSI.Symbol )
470+ recordCall (nullptr , CSI.Symbol , CSI.Count , CSI.Count );
477471 }
478472 continue ;
479473 } else {
480474 // If the function has a valid profile
481- for (BinaryBasicBlock &BB : *Function) {
482- for (MCInst &Inst : BB) {
483- if (!BC.MIB ->isCall (Inst))
484- continue ;
485- // Find call instructions and extract target symbols from each
486- // one.
487- const CallInfoTy CallInfo = getCallInfo (&BB, Inst);
488- for (const TargetDesc &CI : CallInfo) {
489- recordCall (&BB, CI.first , CI.second );
475+ for (const BinaryBasicBlock &BB : *Function) {
476+ for (const MCInst &Inst : BB) {
477+ if (BC.MIB ->isCall (Inst)) {
478+ // Find call instructions and extract target symbols from each
479+ // one.
480+ const CallInfoTy CallInfo = getCallInfo (&BB, Inst);
481+ // We need the total call count to update MaxCountMap and
482+ // MinCountMap in recordCall for indirect calls
483+ uint64_t TotalCallCount = 0 ;
484+ for (const TargetDesc &CI : CallInfo)
485+ TotalCallCount += CI.second ;
486+ for (const TargetDesc &CI : CallInfo)
487+ recordCall (&BB, CI.first , CI.second , TotalCallCount);
490488 }
491489 }
492490 }
@@ -511,14 +509,12 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
511509 FlowInfo TotalFlowMap;
512510 computeFlowMappings (BC, TotalFlowMap);
513511
514- BC.outs () << format (" BOLT-INFO: among the hottest %zu functions " ,
515- RealNumTopFunctions);
512+ BC.outs () << format (" BOLT-INFO: profile quality metrics for the hottest %zu "
513+ " functions (reporting top %zu%% values): " ,
514+ RealNumTopFunctions,
515+ 100 - opts::PercentileForProfileQualityCheck);
516516 printCFGContinuityStats (BC.outs (), Functions);
517- BC.outs () << format (" BOLT-INFO: among the hottest %zu functions " ,
518- RealNumTopFunctions);
519517 printCallGraphFlowConservationStats (BC.outs (), Functions, TotalFlowMap);
520- BC.outs () << format (" BOLT-INFO: among the hottest %zu functions " ,
521- RealNumTopFunctions);
522518 printCFGFlowConservationStats (BC.outs (), Functions, TotalFlowMap);
523519
524520 // Print more detailed bucketed stats if requested.
0 commit comments