diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp index 332c78da8a1e3..dfd74d3dd5719 100644 --- a/bolt/lib/Passes/ProfileQualityStats.cpp +++ b/bolt/lib/Passes/ProfileQualityStats.cpp @@ -52,6 +52,16 @@ struct FlowInfo { FunctionFlowMapTy CallGraphIncomingFlows; }; +// When reporting exception handling stats, we only consider functions with at +// least MinLPECSum counts in landing pads to avoid false positives due to +// sampling noise +const uint16_t MinLPECSum = 50; + +// When reporting CFG flow conservation stats, we only consider blocks with +// execution counts > MinBlockCount when reporting the distribution of worst +// gaps. +const uint16_t MinBlockCount = 500; + template void printDistribution(raw_ostream &OS, std::vector &values, bool Fraction = false) { @@ -91,8 +101,12 @@ void printCFGContinuityStats(raw_ostream &OS, std::vector FractionECUnreachables; for (const BinaryFunction *Function : Functions) { - if (Function->size() <= 1) + if (Function->size() <= 1) { + NumUnreachables.push_back(0); + SumECUnreachables.push_back(0); + FractionECUnreachables.push_back(0.0); continue; + } // Compute the sum of all BB execution counts (ECs). size_t NumPosECBBs = 0; @@ -142,8 +156,10 @@ void printCFGContinuityStats(raw_ostream &OS, const size_t NumPosECBBsUnreachableFromEntry = NumPosECBBs - NumReachableBBs; const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; - const double FractionECUnreachable = - (double)SumUnreachableBBEC / SumAllBBEC; + + double FractionECUnreachable = 0.0; + if (SumAllBBEC > 0) + FractionECUnreachable = (double)SumUnreachableBBEC / SumAllBBEC; if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { OS << "Non-trivial CFG discontinuity observed in function " @@ -157,9 +173,6 @@ void printCFGContinuityStats(raw_ostream &OS, FractionECUnreachables.push_back(FractionECUnreachable); } - if (FractionECUnreachables.empty()) - return; - llvm::sort(FractionECUnreachables); const int Rank = int(FractionECUnreachables.size() * opts::PercentileForProfileQualityCheck / 100); @@ -187,8 +200,10 @@ void printCallGraphFlowConservationStats( std::vector CallGraphGaps; for (const BinaryFunction *Function : Functions) { - if (Function->size() <= 1 || !Function->isSimple()) + if (Function->size() <= 1 || !Function->isSimple()) { + CallGraphGaps.push_back(0.0); continue; + } const uint64_t FunctionNum = Function->getFunctionNumber(); std::vector &IncomingFlows = @@ -199,60 +214,63 @@ void printCallGraphFlowConservationStats( TotalFlowMap.CallGraphIncomingFlows; // Only consider functions that are not a program entry. - if (CallGraphIncomingFlows.find(FunctionNum) != + if (CallGraphIncomingFlows.find(FunctionNum) == CallGraphIncomingFlows.end()) { - uint64_t EntryInflow = 0; - uint64_t EntryOutflow = 0; - uint32_t NumConsideredEntryBlocks = 0; - - Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { - const BinaryBasicBlock *EntryBB = - Function->getBasicBlockAtOffset(Offset); - if (!EntryBB || EntryBB->succ_size() == 0) - return true; - NumConsideredEntryBlocks++; - EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()]; - EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()]; + CallGraphGaps.push_back(0.0); + continue; + } + + uint64_t EntryInflow = 0; + uint64_t EntryOutflow = 0; + uint32_t NumConsideredEntryBlocks = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->succ_size() == 0) return true; - }); - - uint64_t NetEntryOutflow = 0; - if (EntryOutflow < EntryInflow) { - if (opts::Verbosity >= 2) { - // We expect entry blocks' CFG outflow >= inflow, i.e., it has a - // non-negative net outflow. If this is not the case, then raise a - // warning if requested. - OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow " - "in function " - << Function->getPrintName() << "\n"; - if (opts::Verbosity >= 3) - Function->dump(); - } - } else { - NetEntryOutflow = EntryOutflow - EntryInflow; - } - if (NumConsideredEntryBlocks > 0) { - const uint64_t CallGraphInflow = - TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()]; - const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow); - const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow); - const double CallGraphGap = 1 - (double)Min / Max; - - if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) { - OS << "Nontrivial call graph gap of size " - << formatv("{0:P}", CallGraphGap) << " observed in function " - << Function->getPrintName() << "\n"; - if (opts::Verbosity >= 3) - Function->dump(); - } + NumConsideredEntryBlocks++; + EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()]; + EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()]; + return true; + }); - CallGraphGaps.push_back(CallGraphGap); + uint64_t NetEntryOutflow = 0; + if (EntryOutflow < EntryInflow) { + if (opts::Verbosity >= 2) { + // We expect entry blocks' CFG outflow >= inflow, i.e., it has a + // non-negative net outflow. If this is not the case, then raise a + // warning if requested. + OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow " + "in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); } + } else { + NetEntryOutflow = EntryOutflow - EntryInflow; } - } + if (NumConsideredEntryBlocks > 0) { + const uint64_t CallGraphInflow = + TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()]; + const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow); + const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow); + double CallGraphGap = 0.0; + if (Max > 0) + CallGraphGap = 1 - (double)Min / Max; + + if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) { + OS << "Non-trivial call graph gap of size " + << formatv("{0:P}", CallGraphGap) << " observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } - if (CallGraphGaps.empty()) - return; + CallGraphGaps.push_back(CallGraphGap); + } else { + CallGraphGaps.push_back(0.0); + } + } llvm::sort(CallGraphGaps); const int Rank = @@ -265,18 +283,19 @@ void printCallGraphFlowConservationStats( } } -void printCFGFlowConservationStats(raw_ostream &OS, +void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS, iterator_range &Functions, FlowInfo &TotalFlowMap) { std::vector CFGGapsWeightedAvg; std::vector CFGGapsWorst; std::vector CFGGapsWorstAbs; - // We only consider blocks with execution counts > MinBlockCount when - // reporting the distribution of worst gaps. - const uint16_t MinBlockCount = 500; for (const BinaryFunction *Function : Functions) { - if (Function->size() <= 1 || !Function->isSimple()) + if (Function->size() <= 1 || !Function->isSimple()) { + CFGGapsWeightedAvg.push_back(0.0); + CFGGapsWorst.push_back(0.0); + CFGGapsWorstAbs.push_back(0); continue; + } const uint64_t FunctionNum = Function->getFunctionNumber(); std::vector &MaxCountMaps = @@ -295,12 +314,34 @@ void printCFGFlowConservationStats(raw_ostream &OS, if (BB.isEntryPoint() || BB.succ_size() == 0) continue; + if (BB.getKnownExecutionCount() == 0 || BB.getNumNonPseudos() == 0) + continue; + + // We don't consider blocks that is a landing pad or has a + // positive-execution-count landing pad + if (BB.isLandingPad()) + continue; + + if (llvm::any_of(BB.landing_pads(), + std::mem_fn(&BinaryBasicBlock::getKnownExecutionCount))) + continue; + + // We don't consider blocks that end with a recursive call instruction + const MCInst *Inst = BB.getLastNonPseudoInstr(); + if (BC.MIB->isCall(*Inst)) { + const MCSymbol *DstSym = BC.MIB->getTargetSymbol(*Inst); + const BinaryFunction *DstFunc = + DstSym ? BC.getFunctionForSymbol(DstSym) : nullptr; + if (DstFunc == Function) + continue; + } + const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()]; const uint64_t Min = MinCountMaps[BB.getLayoutIndex()]; - const double Gap = 1 - (double)Min / Max; + double Gap = 0.0; + if (Max > 0) + Gap = 1 - (double)Min / Max; double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos(); - if (Weight == 0) - continue; // We use log to prevent the stats from being dominated by extremely hot // blocks Weight = log(Weight); @@ -316,39 +357,36 @@ void printCFGFlowConservationStats(raw_ostream &OS, BBWorstGapAbs = &BB; } } - if (WeightSum > 0) { - const double WeightedGap = WeightedGapSum / WeightSum; - if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) { - OS << "Nontrivial CFG gap observed in function " - << Function->getPrintName() << "\n" - << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n"; - if (BBWorstGap) - OS << "Worst gap: " << formatv("{0:P}", WorstGap) - << " at BB with input offset: 0x" - << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n"; - if (BBWorstGapAbs) - OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with " - << "input offset 0x" - << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n"; - if (opts::Verbosity >= 3) - Function->dump(); - } - - CFGGapsWeightedAvg.push_back(WeightedGap); - CFGGapsWorst.push_back(WorstGap); - CFGGapsWorstAbs.push_back(WorstGapAbs); + double WeightedGap = WeightedGapSum; + if (WeightSum > 0) + WeightedGap /= WeightSum; + if (opts::Verbosity >= 2 && WorstGap >= 0.9) { + OS << "Non-trivial CFG gap observed in function " + << Function->getPrintName() << "\n" + << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n"; + if (BBWorstGap) + OS << "Worst gap: " << formatv("{0:P}", WorstGap) + << " at BB with input offset: 0x" + << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n"; + if (BBWorstGapAbs) + OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with " + << "input offset 0x" + << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); } + CFGGapsWeightedAvg.push_back(WeightedGap); + CFGGapsWorst.push_back(WorstGap); + CFGGapsWorstAbs.push_back(WorstGapAbs); } - if (CFGGapsWeightedAvg.empty()) - return; llvm::sort(CFGGapsWeightedAvg); const int RankWA = int(CFGGapsWeightedAvg.size() * opts::PercentileForProfileQualityCheck / 100); llvm::sort(CFGGapsWorst); const int RankW = int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100); - OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst)\n", + OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst); ", CFGGapsWeightedAvg[RankWA], CFGGapsWorst[RankW]); if (opts::Verbosity >= 1) { OS << "distribution of weighted CFG flow conservation gaps\n"; @@ -365,6 +403,74 @@ void printCFGFlowConservationStats(raw_ostream &OS, } } +void printExceptionHandlingStats(const BinaryContext &BC, raw_ostream &OS, + iterator_range &Functions) { + std::vector LPCountFractionsOfTotalBBEC; + std::vector LPCountFractionsOfTotalInvokeEC; + for (const BinaryFunction *Function : Functions) { + size_t LPECSum = 0; + size_t BBECSum = 0; + size_t InvokeECSum = 0; + for (BinaryBasicBlock &BB : *Function) { + const size_t BBEC = BB.getKnownExecutionCount(); + BBECSum += BBEC; + if (BB.isLandingPad()) + LPECSum += BBEC; + for (const MCInst &Inst : BB) { + if (!BC.MIB->isInvoke(Inst)) + continue; + const std::optional EHInfo = + BC.MIB->getEHInfo(Inst); + if (EHInfo->first) + InvokeECSum += BBEC; + } + } + + if (LPECSum <= MinLPECSum) { + LPCountFractionsOfTotalBBEC.push_back(0.0); + LPCountFractionsOfTotalInvokeEC.push_back(0.0); + continue; + } + double FracTotalBBEC = 0.0; + if (BBECSum > 0) + FracTotalBBEC = (double)LPECSum / BBECSum; + double FracTotalInvokeEC = 0.0; + if (InvokeECSum > 0) + FracTotalInvokeEC = (double)LPECSum / InvokeECSum; + LPCountFractionsOfTotalBBEC.push_back(FracTotalBBEC); + LPCountFractionsOfTotalInvokeEC.push_back(FracTotalInvokeEC); + + if (opts::Verbosity >= 2 && FracTotalInvokeEC >= 0.05) { + OS << "Non-trivial usage of exception handling observed in function " + << Function->getPrintName() << "\n" + << formatv( + "Fraction of total InvokeEC that goes to landing pads: {0:P}\n", + FracTotalInvokeEC); + if (opts::Verbosity >= 3) + Function->dump(); + } + } + + llvm::sort(LPCountFractionsOfTotalBBEC); + const int RankBBEC = int(LPCountFractionsOfTotalBBEC.size() * + opts::PercentileForProfileQualityCheck / 100); + llvm::sort(LPCountFractionsOfTotalInvokeEC); + const int RankInvoke = int(LPCountFractionsOfTotalInvokeEC.size() * + opts::PercentileForProfileQualityCheck / 100); + OS << formatv("exception handling usage {0:P} (of total BBEC) {1:P} (of " + "total InvokeEC)\n", + LPCountFractionsOfTotalBBEC[RankBBEC], + LPCountFractionsOfTotalInvokeEC[RankInvoke]); + if (opts::Verbosity >= 1) { + OS << "distribution of exception handling usage as a fraction of total " + "BBEC of each function\n"; + printDistribution(OS, LPCountFractionsOfTotalBBEC, /*Fraction=*/true); + OS << "distribution of exception handling usage as a fraction of total " + "InvokeEC of each function\n"; + printDistribution(OS, LPCountFractionsOfTotalInvokeEC, /*Fraction=*/true); + } +} + void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) { // Increment block inflow and outflow with CFG jump counts. TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows; @@ -519,8 +625,8 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, 100 - opts::PercentileForProfileQualityCheck); printCFGContinuityStats(BC.outs(), Functions); printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); - printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); - + printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap); + printExceptionHandlingStats(BC, BC.outs(), Functions); // Print more detailed bucketed stats if requested. if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { const size_t PerBucketSize = RealNumTopFunctions / 5; @@ -550,7 +656,8 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, MaxFunctionExecutionCount); printCFGContinuityStats(BC.outs(), Functions); printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); - printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap); + printExceptionHandlingStats(BC, BC.outs(), Functions); } } } diff --git a/bolt/test/X86/profile-quality-reporting-small-binary.s b/bolt/test/X86/profile-quality-reporting-small-binary.s new file mode 100644 index 0000000000000..2b147c5eca81e --- /dev/null +++ b/bolt/test/X86/profile-quality-reporting-small-binary.s @@ -0,0 +1,35 @@ +## Test that BOLT-INFO is correctly formatted after profile quality reporting for +## a small binary. + +# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --data=%t.fdata \ +# RUN: 2>&1 | FileCheck %s + +# CHECK: BOLT-INFO: profile quality metrics for the hottest 2 functions (reporting top 5% values): function CFG discontinuity 0.00%; call graph flow conservation gap 0.00%; CFG flow conservation gap 0.00% (weighted) 0.00% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC) +# CHECK-NEXT: BOLT-INFO: + + .text + .globl func + .type func, @function +func: + pushq %rbp + ret +LLfunc_end: + .size func, LLfunc_end-func + + + .globl main + .type main, @function +main: + pushq %rbp + movq %rsp, %rbp +LLmain_func: + call func +# FDATA: 1 main #LLmain_func# 1 func 0 0 500 + movl $4, %edi + retq +.Lmain_end: + .size main, .Lmain_end-main diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test index 2e15a6b245afa..210d3e10a3890 100644 --- a/bolt/test/X86/profile-quality-reporting.test +++ b/bolt/test/X86/profile-quality-reporting.test @@ -1,4 +1,4 @@ ## Check profile quality stats reporting RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s -CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst) +CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC)