Skip to content

Commit 41a1c38

Browse files
committed
fixup! [BOLT] Flow conservation scores
1 parent b18bb59 commit 41a1c38

File tree

2 files changed

+101
-107
lines changed

2 files changed

+101
-107
lines changed

bolt/lib/Passes/ProfileQualityStats.cpp

Lines changed: 100 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
#include <unordered_map>
2020
#include <unordered_set>
2121

22-
#define DEBUG_TYPE "bolt-opts"
23-
2422
using namespace llvm;
2523
using namespace bolt;
2624

@@ -34,7 +32,7 @@ cl::opt<unsigned> NumFunctionsForProfileQualityCheck(
3432
cl::opt<unsigned> PercentileForProfileQualityCheck(
3533
"percentile-for-profile-quality-check",
3634
cl::desc("Percentile of profile quality distributions over hottest "
37-
"functions to display."),
35+
"functions to report."),
3836
cl::init(95), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
3937
} // namespace opts
4038

@@ -94,8 +92,7 @@ void printCFGContinuityStats(raw_ostream &OS,
9492
std::vector<size_t> SumECUnreachables;
9593
std::vector<double> FractionECUnreachables;
9694

97-
for (auto it = Functions.begin(); it != Functions.end(); ++it) {
98-
const BinaryFunction *Function = *it;
95+
for (const BinaryFunction *Function : Functions) {
9996
if (Function->size() <= 1)
10097
continue;
10198

@@ -104,28 +101,32 @@ void printCFGContinuityStats(raw_ostream &OS,
104101
size_t SumAllBBEC = 0;
105102
for (const BinaryBasicBlock &BB : *Function) {
106103
const size_t BBEC = BB.getKnownExecutionCount();
107-
NumPosECBBs += BBEC > 0 ? 1 : 0;
104+
NumPosECBBs += !!BBEC;
108105
SumAllBBEC += BBEC;
109106
}
110107

111108
// Perform BFS on subgraph of CFG induced by positive weight edges.
112109
// Compute the number of BBs reachable from the entry(s) of the function and
113110
// the sum of their execution counts (ECs).
114-
std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
115111
std::unordered_set<unsigned> Visited;
116112
std::queue<unsigned> Queue;
117-
for (const BinaryBasicBlock &BB : *Function) {
118-
// Make sure BB.getIndex() is not already in IndexToBB.
119-
assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
120-
IndexToBB[BB.getIndex()] = &BB;
121-
if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
122-
Queue.push(BB.getIndex());
123-
Visited.insert(BB.getIndex());
113+
size_t SumReachableBBEC = 0;
114+
115+
Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) {
116+
const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset);
117+
if (EntryBB && EntryBB->getKnownExecutionCount() > 0) {
118+
Queue.push(EntryBB->getLayoutIndex());
119+
Visited.insert(EntryBB->getLayoutIndex());
120+
SumReachableBBEC += EntryBB->getKnownExecutionCount();
124121
}
125-
}
122+
return true;
123+
});
124+
125+
const FunctionLayout &Layout = Function->getLayout();
126+
126127
while (!Queue.empty()) {
127128
const unsigned BBIndex = Queue.front();
128-
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
129+
const BinaryBasicBlock *BB = Layout.getBlock(BBIndex);
129130
Queue.pop();
130131
auto SuccBIIter = BB->branch_info_begin();
131132
for (const BinaryBasicBlock *Succ : BB->successors()) {
@@ -134,25 +135,18 @@ void printCFGContinuityStats(raw_ostream &OS,
134135
++SuccBIIter;
135136
continue;
136137
}
137-
if (!Visited.insert(Succ->getIndex()).second) {
138+
if (!Visited.insert(Succ->getLayoutIndex()).second) {
138139
++SuccBIIter;
139140
continue;
140141
}
141-
Queue.push(Succ->getIndex());
142+
SumReachableBBEC += Succ->getKnownExecutionCount();
143+
Queue.push(Succ->getLayoutIndex());
142144
++SuccBIIter;
143145
}
144146
}
145147

146148
const size_t NumReachableBBs = Visited.size();
147149

148-
// Loop through Visited, and sum the corresponding BBs' execution counts
149-
// (ECs).
150-
size_t SumReachableBBEC = 0;
151-
for (const unsigned BBIndex : Visited) {
152-
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
153-
SumReachableBBEC += BB->getKnownExecutionCount();
154-
}
155-
156150
const size_t NumPosECBBsUnreachableFromEntry =
157151
NumPosECBBs - NumReachableBBs;
158152
const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
@@ -162,7 +156,8 @@ void printCFGContinuityStats(raw_ostream &OS,
162156
if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
163157
OS << "Non-trivial CFG discontinuity observed in function "
164158
<< Function->getPrintName() << "\n";
165-
LLVM_DEBUG(Function->dump());
159+
if (opts::Verbosity >= 3)
160+
Function->dump();
166161
}
167162

168163
NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
@@ -176,11 +171,10 @@ void printCFGContinuityStats(raw_ostream &OS,
176171
std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
177172
const int Rank = int(FractionECUnreachables.size() *
178173
opts::PercentileForProfileQualityCheck / 100);
179-
OS << format("top %zu%% function CFG discontinuity is %.2lf%%\n",
180-
100 - opts::PercentileForProfileQualityCheck,
174+
OS << format("function CFG discontinuity %.2lf%%; ",
181175
FractionECUnreachables[Rank] * 100);
182176
if (opts::Verbosity >= 1) {
183-
OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
177+
OS << "\nabbreviations: EC = execution count, POS BBs = positive EC BBs\n"
184178
<< "distribution of NUM(unreachable POS BBs) per function\n";
185179
std::sort(NumUnreachables.begin(), NumUnreachables.end());
186180
printDistribution(OS, NumUnreachables);
@@ -200,8 +194,7 @@ void printCallGraphFlowConservationStats(
200194
FlowInfo &TotalFlowMap) {
201195
std::vector<double> CallGraphGaps;
202196

203-
for (auto it = Functions.begin(); it != Functions.end(); ++it) {
204-
const BinaryFunction *Function = *it;
197+
for (const BinaryFunction *Function : Functions) {
205198
if (Function->size() <= 1 || !Function->isSimple())
206199
continue;
207200

@@ -223,20 +216,22 @@ void printCallGraphFlowConservationStats(
223216
continue;
224217
NumConsideredEntryBlocks++;
225218

226-
EntryInflow += IncomingMap[BB.getIndex()];
227-
EntryOutflow += OutgoingMap[BB.getIndex()];
219+
EntryInflow += IncomingMap[BB.getLayoutIndex()];
220+
EntryOutflow += OutgoingMap[BB.getLayoutIndex()];
228221
}
229222
}
230223
uint64_t NetEntryOutflow = 0;
231224
if (EntryOutflow < EntryInflow) {
232-
if (opts::Verbosity >= 1) {
225+
if (opts::Verbosity >= 2) {
233226
// We expect entry blocks' CFG outflow >= inflow, i.e., it has a
234227
// non-negative net outflow. If this is not the case, then raise a
235228
// warning if requested.
236-
OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow in "
229+
OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow "
230+
"in "
237231
"function "
238232
<< Function->getPrintName() << "\n";
239-
LLVM_DEBUG(Function->dump());
233+
if (opts::Verbosity >= 3)
234+
Function->dump();
240235
}
241236
} else {
242237
NetEntryOutflow = EntryOutflow - EntryInflow;
@@ -252,25 +247,26 @@ void printCallGraphFlowConservationStats(
252247
OS << "Nontrivial call graph gap of size "
253248
<< format("%.2lf%%", 100 * CallGraphGap)
254249
<< " observed in function " << Function->getPrintName() << "\n";
255-
LLVM_DEBUG(Function->dump());
250+
if (opts::Verbosity >= 3)
251+
Function->dump();
256252
}
257253

258254
CallGraphGaps.push_back(CallGraphGap);
259255
}
260256
}
261257
}
262258

263-
if (!CallGraphGaps.empty()) {
264-
std::sort(CallGraphGaps.begin(), CallGraphGaps.end());
265-
const int Rank = int(CallGraphGaps.size() *
266-
opts::PercentileForProfileQualityCheck / 100);
267-
OS << format("top %zu%% call graph flow conservation gap is %.2lf%%\n",
268-
100 - opts::PercentileForProfileQualityCheck,
269-
CallGraphGaps[Rank] * 100);
270-
if (opts::Verbosity >= 1) {
271-
OS << "distribution of function entry flow conservation gaps\n";
272-
printDistribution(OS, CallGraphGaps, /*Fraction=*/true);
273-
}
259+
if (CallGraphGaps.empty())
260+
return;
261+
262+
std::sort(CallGraphGaps.begin(), CallGraphGaps.end());
263+
const int Rank =
264+
int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100);
265+
OS << format("call graph flow conservation gap %.2lf%%; ",
266+
CallGraphGaps[Rank] * 100);
267+
if (opts::Verbosity >= 1) {
268+
OS << "\ndistribution of function entry flow conservation gaps\n";
269+
printDistribution(OS, CallGraphGaps, /*Fraction=*/true);
274270
}
275271
}
276272

@@ -281,8 +277,7 @@ void printCFGFlowConservationStats(raw_ostream &OS,
281277
std::vector<double> CFGGapsWorst;
282278
std::vector<uint64_t> CFGGapsWorstAbs;
283279

284-
for (auto it = Functions.begin(); it != Functions.end(); ++it) {
285-
const BinaryFunction *Function = *it;
280+
for (const BinaryFunction *Function : Functions) {
286281
if (Function->size() <= 1 || !Function->isSimple())
287282
continue;
288283

@@ -301,8 +296,8 @@ void printCFGFlowConservationStats(raw_ostream &OS,
301296
if (BB.isEntryPoint() || BB.succ_size() == 0)
302297
continue;
303298

304-
const uint64_t Max = MaxCountMaps[BB.getIndex()];
305-
const uint64_t Min = MinCountMaps[BB.getIndex()];
299+
const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()];
300+
const uint64_t Min = MinCountMaps[BB.getLayoutIndex()];
306301
const double Gap = 1 - (double)Min / Max;
307302
double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos();
308303
if (Weight == 0)
@@ -335,7 +330,8 @@ void printCFGFlowConservationStats(raw_ostream &OS,
335330
OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
336331
<< "input offset 0x"
337332
<< Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
338-
LLVM_DEBUG(Function->dump());
333+
if (opts::Verbosity >= 3)
334+
Function->dump();
339335
}
340336

341337
CFGGapsWeightedAvg.push_back(WeightedGap);
@@ -344,30 +340,27 @@ void printCFGFlowConservationStats(raw_ostream &OS,
344340
}
345341
}
346342

347-
if (!CFGGapsWeightedAvg.empty()) {
348-
std::sort(CFGGapsWeightedAvg.begin(), CFGGapsWeightedAvg.end());
349-
const int RankWA = int(CFGGapsWeightedAvg.size() *
350-
opts::PercentileForProfileQualityCheck / 100);
351-
std::sort(CFGGapsWorst.begin(), CFGGapsWorst.end());
352-
const int RankW =
353-
int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
354-
OS << format(
355-
"top %zu%% CFG flow conservation gap is %.2lf%% (weighted) and "
356-
"%.2lf%% (worst)\n",
357-
100 - opts::PercentileForProfileQualityCheck,
358-
CFGGapsWeightedAvg[RankWA] * 100, CFGGapsWorst[RankW] * 100);
359-
if (opts::Verbosity >= 1) {
360-
OS << "distribution of weighted CFG flow conservation gaps\n";
361-
printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true);
362-
OS << "Consider only blocks with execution counts > 500:\n"
363-
<< "distribution of worst block flow conservation gap per "
364-
"function \n";
365-
printDistribution(OS, CFGGapsWorst, /*Fraction=*/true);
366-
OS << "distribution of worst block flow conservation gap (absolute "
367-
"value) per function\n";
368-
std::sort(CFGGapsWorstAbs.begin(), CFGGapsWorstAbs.end());
369-
printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false);
370-
}
343+
if (CFGGapsWeightedAvg.empty())
344+
return;
345+
std::sort(CFGGapsWeightedAvg.begin(), CFGGapsWeightedAvg.end());
346+
const int RankWA = int(CFGGapsWeightedAvg.size() *
347+
opts::PercentileForProfileQualityCheck / 100);
348+
std::sort(CFGGapsWorst.begin(), CFGGapsWorst.end());
349+
const int RankW =
350+
int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
351+
OS << format("CFG flow conservation gap %.2lf%% (weighted) %.2lf%% (worst)\n",
352+
CFGGapsWeightedAvg[RankWA] * 100, CFGGapsWorst[RankW] * 100);
353+
if (opts::Verbosity >= 1) {
354+
OS << "distribution of weighted CFG flow conservation gaps\n";
355+
printDistribution(OS, CFGGapsWeightedAvg, /*Fraction=*/true);
356+
OS << "Consider only blocks with execution counts > 500:\n"
357+
<< "distribution of worst block flow conservation gap per "
358+
"function \n";
359+
printDistribution(OS, CFGGapsWorst, /*Fraction=*/true);
360+
OS << "distribution of worst block flow conservation gap (absolute "
361+
"value) per function\n";
362+
std::sort(CFGGapsWorstAbs.begin(), CFGGapsWorstAbs.end());
363+
printDistribution(OS, CFGGapsWorstAbs, /*Fraction=*/false);
371364
}
372365
}
373366

@@ -391,10 +384,10 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
391384
continue;
392385
}
393386
TotalOutgoing += Count;
394-
IncomingMap[Succ->getIndex()] += Count;
387+
IncomingMap[Succ->getLayoutIndex()] += Count;
395388
++SuccBIIter;
396389
}
397-
OutgoingMap[BB.getIndex()] = TotalOutgoing;
390+
OutgoingMap[BB.getLayoutIndex()] = TotalOutgoing;
398391
}
399392
}
400393

@@ -412,7 +405,7 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
412405
FlowMapTy &MaxCountMap = TotalMaxCountMaps[FunctionNum];
413406
FlowMapTy &MinCountMap = TotalMinCountMaps[FunctionNum];
414407
for (const BinaryBasicBlock &BB : *Function) {
415-
uint64_t BBNum = BB.getIndex();
408+
uint64_t BBNum = BB.getLayoutIndex();
416409
MaxCountMap[BBNum] = std::max(IncomingMap[BBNum], OutgoingMap[BBNum]);
417410
MinCountMap[BBNum] = std::min(IncomingMap[BBNum], OutgoingMap[BBNum]);
418411
}
@@ -429,17 +422,20 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
429422

430423
// Update MaxCountMap, MinCountMap, and CallGraphIncomingMap
431424
auto recordCall = [&](const BinaryBasicBlock *SourceBB,
432-
const MCSymbol *DestSymbol, uint64_t Count) {
425+
const MCSymbol *DestSymbol, uint64_t Count,
426+
uint64_t TotalCallCount) {
433427
if (Count == BinaryBasicBlock::COUNT_NO_PROFILE)
434428
Count = 0;
435429
const BinaryFunction *DstFunc =
436430
DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr;
437431
if (DstFunc)
438432
CallGraphIncomingMap[DstFunc->getFunctionNumber()] += Count;
439433
if (SourceBB) {
440-
unsigned BlockIndex = SourceBB->getIndex();
441-
MaxCountMap[BlockIndex] = std::max(MaxCountMap[BlockIndex], Count);
442-
MinCountMap[BlockIndex] = std::min(MinCountMap[BlockIndex], Count);
434+
unsigned BlockIndex = SourceBB->getLayoutIndex();
435+
MaxCountMap[BlockIndex] =
436+
std::max(MaxCountMap[BlockIndex], TotalCallCount);
437+
MinCountMap[BlockIndex] =
438+
std::min(MinCountMap[BlockIndex], TotalCallCount);
443439
}
444440
};
445441

@@ -452,7 +448,6 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
452448
CallInfoTy Counts;
453449
const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
454450

455-
// If this is an indirect call use perf data directly.
456451
if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
457452
const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
458453
Inst, "CallProfile");
@@ -471,22 +466,25 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
471466
// directly. The call EC is only used to update CallGraphIncomingMap.
472467
if (!Function->hasValidProfile() && !Function->getAllCallSites().empty()) {
473468
for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
474-
if (!CSI.Symbol)
475-
continue;
476-
recordCall(nullptr, CSI.Symbol, CSI.Count);
469+
if (CSI.Symbol)
470+
recordCall(nullptr, CSI.Symbol, CSI.Count, CSI.Count);
477471
}
478472
continue;
479473
} else {
480474
// If the function has a valid profile
481-
for (BinaryBasicBlock &BB : *Function) {
482-
for (MCInst &Inst : BB) {
483-
if (!BC.MIB->isCall(Inst))
484-
continue;
485-
// Find call instructions and extract target symbols from each
486-
// one.
487-
const CallInfoTy CallInfo = getCallInfo(&BB, Inst);
488-
for (const TargetDesc &CI : CallInfo) {
489-
recordCall(&BB, CI.first, CI.second);
475+
for (const BinaryBasicBlock &BB : *Function) {
476+
for (const MCInst &Inst : BB) {
477+
if (BC.MIB->isCall(Inst)) {
478+
// Find call instructions and extract target symbols from each
479+
// one.
480+
const CallInfoTy CallInfo = getCallInfo(&BB, Inst);
481+
// We need the total call count to update MaxCountMap and
482+
// MinCountMap in recordCall for indirect calls
483+
uint64_t TotalCallCount = 0;
484+
for (const TargetDesc &CI : CallInfo)
485+
TotalCallCount += CI.second;
486+
for (const TargetDesc &CI : CallInfo)
487+
recordCall(&BB, CI.first, CI.second, TotalCallCount);
490488
}
491489
}
492490
}
@@ -511,14 +509,12 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
511509
FlowInfo TotalFlowMap;
512510
computeFlowMappings(BC, TotalFlowMap);
513511

514-
BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
515-
RealNumTopFunctions);
512+
BC.outs() << format("BOLT-INFO: profile quality metrics for the hottest %zu "
513+
"functions (reporting top %zu%% values): ",
514+
RealNumTopFunctions,
515+
100 - opts::PercentileForProfileQualityCheck);
516516
printCFGContinuityStats(BC.outs(), Functions);
517-
BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
518-
RealNumTopFunctions);
519517
printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
520-
BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
521-
RealNumTopFunctions);
522518
printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
523519

524520
// Print more detailed bucketed stats if requested.

0 commit comments

Comments
 (0)