diff --git a/.ci/compute_projects.py b/.ci/compute_projects.py index 17a2136a270d5..ff43547c9bbe5 100644 --- a/.ci/compute_projects.py +++ b/.ci/compute_projects.py @@ -52,9 +52,6 @@ "clang": {"clang-tools-extra", "compiler-rt", "cross-project-tests"}, "clang-tools-extra": {"libc"}, "mlir": {"flang"}, - # Test everything if ci scripts are changed. - # FIXME: Figure out what is missing and add here. - ".ci": {"llvm", "clang", "lld", "lldb"}, } DEPENDENT_RUNTIMES_TO_TEST = {"clang": {"libcxx", "libcxxabi", "libunwind"}} @@ -133,11 +130,12 @@ def _add_dependencies(projects: Set[str]) -> Set[str]: def _compute_projects_to_test(modified_projects: Set[str], platform: str) -> Set[str]: projects_to_test = set() for modified_project in modified_projects: + # Skip all projects where we cannot run tests. + if modified_project not in PROJECT_CHECK_TARGETS: + continue if modified_project in RUNTIMES: continue - # Skip all projects where we cannot run tests. - if modified_project in PROJECT_CHECK_TARGETS: - projects_to_test.add(modified_project) + projects_to_test.add(modified_project) if modified_project not in DEPENDENTS_TO_TEST: continue for dependent_project in DEPENDENTS_TO_TEST[modified_project]: diff --git a/.ci/compute_projects_test.py b/.ci/compute_projects_test.py index f71b56d2525ba..e787fd8133c86 100644 --- a/.ci/compute_projects_test.py +++ b/.ci/compute_projects_test.py @@ -188,23 +188,6 @@ def test_exclude_gn(self): self.assertEqual(env_variables["runtimes_to_build"], "") self.assertEqual(env_variables["runtimes_check_targets"], "") - def test_ci(self): - env_variables = compute_projects.get_env_variables( - [".ci/compute_projects.py"], "Linux" - ) - self.assertEqual(env_variables["projects_to_build"], "clang;lld;llvm;lldb") - self.assertEqual( - env_variables["project_check_targets"], - "check-clang check-lld check-llvm check-lldb", - ) - self.assertEqual( - env_variables["runtimes_to_build"], "libcxx;libcxxabi;libunwind" - ) - self.assertEqual( - env_variables["runtimes_check_targets"], - "check-cxx check-cxxabi check-unwind", - ) - if __name__ == "__main__": unittest.main() diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh index a56a8bcbffecb..6461c9d40ad59 100755 --- a/.ci/monolithic-linux.sh +++ b/.ci/monolithic-linux.sh @@ -28,14 +28,10 @@ if [[ -n "${CLEAR_CACHE:-}" ]]; then ccache --clear fi -mkdir -p artifacts/reproducers - -# Make sure any clang reproducers will end up as artifacts. -export CLANG_CRASH_DIAGNOSTICS_DIR=`realpath artifacts/reproducers` - function at-exit { retcode=$? + mkdir -p artifacts ccache --print-stats > artifacts/ccache_stats.txt cp "${BUILD_DIR}"/.ninja_log artifacts/.ninja_log @@ -62,18 +58,9 @@ export PIP_BREAK_SYSTEM_PACKAGES=1 pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt pip install -q -r "${MONOREPO_ROOT}"/lldb/test/requirements.txt pip install -q -r "${MONOREPO_ROOT}"/.ci/requirements.txt - -# Set the system llvm-symbolizer as preferred. -export LLVM_SYMBOLIZER_PATH=`which llvm-symbolizer` -[[ ! -f "${LLVM_SYMBOLIZER_PATH}" ]] && echo "llvm-symbolizer not found!" - -# Set up all runtimes either way. libcxx is a dependency of LLDB. -# It will not be built unless it is used. cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_ENABLE_PROJECTS="${projects}" \ - -D LLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ -G Ninja \ - -D CMAKE_PREFIX_PATH="${HOME}/.local" \ -D CMAKE_BUILD_TYPE=Release \ -D LLVM_ENABLE_ASSERTIONS=ON \ -D LLVM_BUILD_EXAMPLES=ON \ @@ -82,10 +69,7 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_ENABLE_LLD=ON \ -D CMAKE_CXX_FLAGS=-gmlt \ -D LLVM_CCACHE_BUILD=ON \ - -D LIBCXX_CXX_ABI=libcxxabi \ -D MLIR_ENABLE_BINDINGS_PYTHON=ON \ - -D LLDB_ENABLE_PYTHON=ON \ - -D LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS=ON \ -D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}" echo "--- ninja" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 25ac0596d78b5..dd4116fa16bc5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -128,7 +128,7 @@ /mlir/**/Transforms/SROA.* @moxinilian # BOLT -/bolt/ @aaupov @maksfb @rafaelauler @ayermolo @yota9 +/bolt/ @aaupov @maksfb @rafaelauler @ayermolo @dcci @yota9 # Bazel build system. /utils/bazel/ @rupprecht @keith @aaronmondal diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index 1ec159bfaa090..c0c61748010d0 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -7,12 +7,6 @@ ClangIR: - clang/tools/cir-*/**/* - clang/test/CIR/**/* -clang:bytecode: - - clang/docs/ConstantInterpreter.rst - - clang/lib/AST/ByteCode/**/* - - clang/test/AST/ByteCode/**/* - - clang/unittests/AST/ByteCode/**/* - clang:dataflow: - clang/include/clang/Analysis/FlowSensitive/**/* - clang/lib/Analysis/FlowSensitive/**/* diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 79a91861554df..56eb463fc98fc 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -223,8 +223,7 @@ class DataAggregator : public DataReader { bool recordExit(BinaryFunction &BF, uint64_t From, bool Mispred, uint64_t Count = 1) const; - /// Branch stacks aggregation statistics - uint64_t NumTraces{0}; + /// Aggregation statistics uint64_t NumInvalidTraces{0}; uint64_t NumLongRangeTraces{0}; /// Specifies how many samples were recorded in cold areas if we are dealing @@ -232,7 +231,6 @@ class DataAggregator : public DataReader { /// for the source of the branch to avoid counting cold activity twice (one /// for source and another for destination). uint64_t NumColdSamples{0}; - uint64_t NumTotalSamples{0}; /// Looks into system PATH for Linux Perf and set up the aggregator to use it void findPerfExecutable(); @@ -329,8 +327,8 @@ class DataAggregator : public DataReader { /// Parse a single LBR entry as output by perf script -Fbrstack ErrorOr parseLBREntry(); - /// Parse LBR sample. - void parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix); + /// Parse LBR sample, returns the number of traces. + uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix); /// Parse and pre-aggregate branch events. std::error_code parseBranchEvents(); @@ -489,13 +487,6 @@ class DataAggregator : public DataReader { void dump(const PerfBranchSample &Sample) const; void dump(const PerfMemSample &Sample) const; - /// Profile diagnostics print methods - void printColdSamplesDiagnostic() const; - void printLongRangeTracesDiagnostic() const; - void printBranchSamplesDiagnostics() const; - void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const; - void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const; - public: /// If perf.data was collected without build ids, the buildid-list may contain /// incomplete entries. Return true if the buffer containing diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index 311d5c15b8dca..2a2192b79bb4b 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -372,7 +372,8 @@ void BinaryBasicBlock::updateJumpTableSuccessors() { [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) { return BB1->getInputOffset() < BB2->getInputOffset(); }); - SuccessorBBs.erase(llvm::unique(SuccessorBBs), SuccessorBBs.end()); + SuccessorBBs.erase(std::unique(SuccessorBBs.begin(), SuccessorBBs.end()), + SuccessorBBs.end()); for (BinaryBasicBlock *BB : SuccessorBBs) addSuccessor(BB); diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 7b5cd276fee89..d3ed9e8088bfa 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -373,10 +373,8 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitLabel(StartSymbol); } - const bool NeedsFDE = - Function.hasCFI() && !(Function.isPatch() && Function.isAnonymous()); // Emit CFI start - if (NeedsFDE) { + if (Function.hasCFI()) { Streamer.emitCFIStartProc(/*IsSimple=*/false); if (Function.getPersonalityFunction() != nullptr) Streamer.emitCFIPersonality(Function.getPersonalityFunction(), @@ -423,7 +421,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitBytes(BC.MIB->getTrapFillValue()); // Emit CFI end - if (NeedsFDE) + if (Function.hasCFI()) Streamer.emitCFIEndProc(); MCSymbol *EndSymbol = Function.getFunctionEndLabel(FF.getFragmentNum()); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 4624abadc701a..184a4462b356a 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2376,7 +2376,7 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { // Without doing jump table value profiling we don't have a use for extra // (duplicate) branches. llvm::sort(TakenBranches); - auto NewEnd = llvm::unique(TakenBranches); + auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end()); TakenBranches.erase(NewEnd, TakenBranches.end()); for (std::pair &Branch : TakenBranches) { diff --git a/bolt/lib/Core/DebugNames.cpp b/bolt/lib/Core/DebugNames.cpp index 82cabe689eb14..366c22c38e616 100644 --- a/bolt/lib/Core/DebugNames.cpp +++ b/bolt/lib/Core/DebugNames.cpp @@ -440,7 +440,8 @@ void DWARF5AcceleratorTable::computeBucketCount() { for (const auto &E : Entries) Uniques.push_back(E.second.HashValue); array_pod_sort(Uniques.begin(), Uniques.end()); - std::vector::iterator P = llvm::unique(Uniques); + std::vector::iterator P = + std::unique(Uniques.begin(), Uniques.end()); UniqueHashCount = std::distance(Uniques.begin(), P); diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 7752079b61538..a3be147a09066 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -442,10 +442,10 @@ void MCPlusBuilder::getUsedRegs(const MCInst &Inst, BitVector &Regs) const { for (MCPhysReg ImplicitUse : InstInfo.implicit_uses()) Regs |= getAliases(ImplicitUse, /*OnlySmaller=*/true); - for (const MCOperand &Operand : useOperands(Inst)) { - if (!Operand.isReg()) + for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { + if (!Inst.getOperand(I).isReg()) continue; - Regs |= getAliases(Operand.getReg(), /*OnlySmaller=*/true); + Regs |= getAliases(Inst.getOperand(I).getReg(), /*OnlySmaller=*/true); } } diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp index dfd74d3dd5719..332c78da8a1e3 100644 --- a/bolt/lib/Passes/ProfileQualityStats.cpp +++ b/bolt/lib/Passes/ProfileQualityStats.cpp @@ -52,16 +52,6 @@ struct FlowInfo { FunctionFlowMapTy CallGraphIncomingFlows; }; -// When reporting exception handling stats, we only consider functions with at -// least MinLPECSum counts in landing pads to avoid false positives due to -// sampling noise -const uint16_t MinLPECSum = 50; - -// When reporting CFG flow conservation stats, we only consider blocks with -// execution counts > MinBlockCount when reporting the distribution of worst -// gaps. -const uint16_t MinBlockCount = 500; - template void printDistribution(raw_ostream &OS, std::vector &values, bool Fraction = false) { @@ -101,12 +91,8 @@ void printCFGContinuityStats(raw_ostream &OS, std::vector FractionECUnreachables; for (const BinaryFunction *Function : Functions) { - if (Function->size() <= 1) { - NumUnreachables.push_back(0); - SumECUnreachables.push_back(0); - FractionECUnreachables.push_back(0.0); + if (Function->size() <= 1) continue; - } // Compute the sum of all BB execution counts (ECs). size_t NumPosECBBs = 0; @@ -156,10 +142,8 @@ void printCFGContinuityStats(raw_ostream &OS, const size_t NumPosECBBsUnreachableFromEntry = NumPosECBBs - NumReachableBBs; const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; - - double FractionECUnreachable = 0.0; - if (SumAllBBEC > 0) - FractionECUnreachable = (double)SumUnreachableBBEC / SumAllBBEC; + const double FractionECUnreachable = + (double)SumUnreachableBBEC / SumAllBBEC; if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { OS << "Non-trivial CFG discontinuity observed in function " @@ -173,6 +157,9 @@ void printCFGContinuityStats(raw_ostream &OS, FractionECUnreachables.push_back(FractionECUnreachable); } + if (FractionECUnreachables.empty()) + return; + llvm::sort(FractionECUnreachables); const int Rank = int(FractionECUnreachables.size() * opts::PercentileForProfileQualityCheck / 100); @@ -200,10 +187,8 @@ void printCallGraphFlowConservationStats( std::vector CallGraphGaps; for (const BinaryFunction *Function : Functions) { - if (Function->size() <= 1 || !Function->isSimple()) { - CallGraphGaps.push_back(0.0); + if (Function->size() <= 1 || !Function->isSimple()) continue; - } const uint64_t FunctionNum = Function->getFunctionNumber(); std::vector &IncomingFlows = @@ -214,64 +199,61 @@ void printCallGraphFlowConservationStats( TotalFlowMap.CallGraphIncomingFlows; // Only consider functions that are not a program entry. - if (CallGraphIncomingFlows.find(FunctionNum) == + if (CallGraphIncomingFlows.find(FunctionNum) != CallGraphIncomingFlows.end()) { - CallGraphGaps.push_back(0.0); - continue; - } - - uint64_t EntryInflow = 0; - uint64_t EntryOutflow = 0; - uint32_t NumConsideredEntryBlocks = 0; - - Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { - const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset); - if (!EntryBB || EntryBB->succ_size() == 0) + uint64_t EntryInflow = 0; + uint64_t EntryOutflow = 0; + uint32_t NumConsideredEntryBlocks = 0; + + Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) { + const BinaryBasicBlock *EntryBB = + Function->getBasicBlockAtOffset(Offset); + if (!EntryBB || EntryBB->succ_size() == 0) + return true; + NumConsideredEntryBlocks++; + EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()]; + EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()]; return true; - NumConsideredEntryBlocks++; - EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()]; - EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()]; - return true; - }); - - uint64_t NetEntryOutflow = 0; - if (EntryOutflow < EntryInflow) { - if (opts::Verbosity >= 2) { - // We expect entry blocks' CFG outflow >= inflow, i.e., it has a - // non-negative net outflow. If this is not the case, then raise a - // warning if requested. - OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow " - "in function " - << Function->getPrintName() << "\n"; - if (opts::Verbosity >= 3) - Function->dump(); - } - } else { - NetEntryOutflow = EntryOutflow - EntryInflow; - } - if (NumConsideredEntryBlocks > 0) { - const uint64_t CallGraphInflow = - TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()]; - const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow); - const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow); - double CallGraphGap = 0.0; - if (Max > 0) - CallGraphGap = 1 - (double)Min / Max; - - if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) { - OS << "Non-trivial call graph gap of size " - << formatv("{0:P}", CallGraphGap) << " observed in function " - << Function->getPrintName() << "\n"; - if (opts::Verbosity >= 3) - Function->dump(); + }); + + uint64_t NetEntryOutflow = 0; + if (EntryOutflow < EntryInflow) { + if (opts::Verbosity >= 2) { + // We expect entry blocks' CFG outflow >= inflow, i.e., it has a + // non-negative net outflow. If this is not the case, then raise a + // warning if requested. + OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow " + "in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + } else { + NetEntryOutflow = EntryOutflow - EntryInflow; } + if (NumConsideredEntryBlocks > 0) { + const uint64_t CallGraphInflow = + TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()]; + const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow); + const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow); + const double CallGraphGap = 1 - (double)Min / Max; + + if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) { + OS << "Nontrivial call graph gap of size " + << formatv("{0:P}", CallGraphGap) << " observed in function " + << Function->getPrintName() << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } - CallGraphGaps.push_back(CallGraphGap); - } else { - CallGraphGaps.push_back(0.0); + CallGraphGaps.push_back(CallGraphGap); + } } } + if (CallGraphGaps.empty()) + return; + llvm::sort(CallGraphGaps); const int Rank = int(CallGraphGaps.size() * opts::PercentileForProfileQualityCheck / 100); @@ -283,19 +265,18 @@ void printCallGraphFlowConservationStats( } } -void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS, +void printCFGFlowConservationStats(raw_ostream &OS, iterator_range &Functions, FlowInfo &TotalFlowMap) { std::vector CFGGapsWeightedAvg; std::vector CFGGapsWorst; std::vector CFGGapsWorstAbs; + // We only consider blocks with execution counts > MinBlockCount when + // reporting the distribution of worst gaps. + const uint16_t MinBlockCount = 500; for (const BinaryFunction *Function : Functions) { - if (Function->size() <= 1 || !Function->isSimple()) { - CFGGapsWeightedAvg.push_back(0.0); - CFGGapsWorst.push_back(0.0); - CFGGapsWorstAbs.push_back(0); + if (Function->size() <= 1 || !Function->isSimple()) continue; - } const uint64_t FunctionNum = Function->getFunctionNumber(); std::vector &MaxCountMaps = @@ -314,34 +295,12 @@ void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS, if (BB.isEntryPoint() || BB.succ_size() == 0) continue; - if (BB.getKnownExecutionCount() == 0 || BB.getNumNonPseudos() == 0) - continue; - - // We don't consider blocks that is a landing pad or has a - // positive-execution-count landing pad - if (BB.isLandingPad()) - continue; - - if (llvm::any_of(BB.landing_pads(), - std::mem_fn(&BinaryBasicBlock::getKnownExecutionCount))) - continue; - - // We don't consider blocks that end with a recursive call instruction - const MCInst *Inst = BB.getLastNonPseudoInstr(); - if (BC.MIB->isCall(*Inst)) { - const MCSymbol *DstSym = BC.MIB->getTargetSymbol(*Inst); - const BinaryFunction *DstFunc = - DstSym ? BC.getFunctionForSymbol(DstSym) : nullptr; - if (DstFunc == Function) - continue; - } - const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()]; const uint64_t Min = MinCountMaps[BB.getLayoutIndex()]; - double Gap = 0.0; - if (Max > 0) - Gap = 1 - (double)Min / Max; + const double Gap = 1 - (double)Min / Max; double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos(); + if (Weight == 0) + continue; // We use log to prevent the stats from being dominated by extremely hot // blocks Weight = log(Weight); @@ -357,36 +316,39 @@ void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS, BBWorstGapAbs = &BB; } } - double WeightedGap = WeightedGapSum; - if (WeightSum > 0) - WeightedGap /= WeightSum; - if (opts::Verbosity >= 2 && WorstGap >= 0.9) { - OS << "Non-trivial CFG gap observed in function " - << Function->getPrintName() << "\n" - << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n"; - if (BBWorstGap) - OS << "Worst gap: " << formatv("{0:P}", WorstGap) - << " at BB with input offset: 0x" - << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n"; - if (BBWorstGapAbs) - OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with " - << "input offset 0x" - << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n"; - if (opts::Verbosity >= 3) - Function->dump(); + if (WeightSum > 0) { + const double WeightedGap = WeightedGapSum / WeightSum; + if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) { + OS << "Nontrivial CFG gap observed in function " + << Function->getPrintName() << "\n" + << "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n"; + if (BBWorstGap) + OS << "Worst gap: " << formatv("{0:P}", WorstGap) + << " at BB with input offset: 0x" + << Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n"; + if (BBWorstGapAbs) + OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with " + << "input offset 0x" + << Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n"; + if (opts::Verbosity >= 3) + Function->dump(); + } + + CFGGapsWeightedAvg.push_back(WeightedGap); + CFGGapsWorst.push_back(WorstGap); + CFGGapsWorstAbs.push_back(WorstGapAbs); } - CFGGapsWeightedAvg.push_back(WeightedGap); - CFGGapsWorst.push_back(WorstGap); - CFGGapsWorstAbs.push_back(WorstGapAbs); } + if (CFGGapsWeightedAvg.empty()) + return; llvm::sort(CFGGapsWeightedAvg); const int RankWA = int(CFGGapsWeightedAvg.size() * opts::PercentileForProfileQualityCheck / 100); llvm::sort(CFGGapsWorst); const int RankW = int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100); - OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst); ", + OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst)\n", CFGGapsWeightedAvg[RankWA], CFGGapsWorst[RankW]); if (opts::Verbosity >= 1) { OS << "distribution of weighted CFG flow conservation gaps\n"; @@ -403,74 +365,6 @@ void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS, } } -void printExceptionHandlingStats(const BinaryContext &BC, raw_ostream &OS, - iterator_range &Functions) { - std::vector LPCountFractionsOfTotalBBEC; - std::vector LPCountFractionsOfTotalInvokeEC; - for (const BinaryFunction *Function : Functions) { - size_t LPECSum = 0; - size_t BBECSum = 0; - size_t InvokeECSum = 0; - for (BinaryBasicBlock &BB : *Function) { - const size_t BBEC = BB.getKnownExecutionCount(); - BBECSum += BBEC; - if (BB.isLandingPad()) - LPECSum += BBEC; - for (const MCInst &Inst : BB) { - if (!BC.MIB->isInvoke(Inst)) - continue; - const std::optional EHInfo = - BC.MIB->getEHInfo(Inst); - if (EHInfo->first) - InvokeECSum += BBEC; - } - } - - if (LPECSum <= MinLPECSum) { - LPCountFractionsOfTotalBBEC.push_back(0.0); - LPCountFractionsOfTotalInvokeEC.push_back(0.0); - continue; - } - double FracTotalBBEC = 0.0; - if (BBECSum > 0) - FracTotalBBEC = (double)LPECSum / BBECSum; - double FracTotalInvokeEC = 0.0; - if (InvokeECSum > 0) - FracTotalInvokeEC = (double)LPECSum / InvokeECSum; - LPCountFractionsOfTotalBBEC.push_back(FracTotalBBEC); - LPCountFractionsOfTotalInvokeEC.push_back(FracTotalInvokeEC); - - if (opts::Verbosity >= 2 && FracTotalInvokeEC >= 0.05) { - OS << "Non-trivial usage of exception handling observed in function " - << Function->getPrintName() << "\n" - << formatv( - "Fraction of total InvokeEC that goes to landing pads: {0:P}\n", - FracTotalInvokeEC); - if (opts::Verbosity >= 3) - Function->dump(); - } - } - - llvm::sort(LPCountFractionsOfTotalBBEC); - const int RankBBEC = int(LPCountFractionsOfTotalBBEC.size() * - opts::PercentileForProfileQualityCheck / 100); - llvm::sort(LPCountFractionsOfTotalInvokeEC); - const int RankInvoke = int(LPCountFractionsOfTotalInvokeEC.size() * - opts::PercentileForProfileQualityCheck / 100); - OS << formatv("exception handling usage {0:P} (of total BBEC) {1:P} (of " - "total InvokeEC)\n", - LPCountFractionsOfTotalBBEC[RankBBEC], - LPCountFractionsOfTotalInvokeEC[RankInvoke]); - if (opts::Verbosity >= 1) { - OS << "distribution of exception handling usage as a fraction of total " - "BBEC of each function\n"; - printDistribution(OS, LPCountFractionsOfTotalBBEC, /*Fraction=*/true); - OS << "distribution of exception handling usage as a fraction of total " - "InvokeEC of each function\n"; - printDistribution(OS, LPCountFractionsOfTotalInvokeEC, /*Fraction=*/true); - } -} - void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) { // Increment block inflow and outflow with CFG jump counts. TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows; @@ -625,8 +519,8 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, 100 - opts::PercentileForProfileQualityCheck); printCFGContinuityStats(BC.outs(), Functions); printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); - printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap); - printExceptionHandlingStats(BC, BC.outs(), Functions); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); + // Print more detailed bucketed stats if requested. if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { const size_t PerBucketSize = RealNumTopFunctions / 5; @@ -656,8 +550,7 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, MaxFunctionExecutionCount); printCFGContinuityStats(BC.outs(), Functions); printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap); - printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap); - printExceptionHandlingStats(BC, BC.outs(), Functions); + printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap); } } } diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a8a187974418d..f016576ff61af 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -634,7 +634,7 @@ bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address, uint64_t Count) { BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc); BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; - if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress()))) + if (ParentFunc) NumColdSamples += Count; auto I = NamesToSamples.find(Func.getOneName()); @@ -756,13 +756,12 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); BinaryFunction *ParentFunc = getBATParentFunction(*Func); - if (IsFrom && - (ParentFunc || (BAT && !BAT->isBATFunction(Func->getAddress())))) - NumColdSamples += Count; - if (!ParentFunc) return std::pair{Func, IsRetOrCallCont}; + if (IsFrom) + NumColdSamples += Count; + return std::pair{ParentFunc, IsRetOrCallCont}; }; @@ -1423,8 +1422,9 @@ std::error_code DataAggregator::printLBRHeatMap() { return std::error_code(); } -void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, - bool NeedsSkylakeFix) { +uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample, + bool NeedsSkylakeFix) { + uint64_t NumTraces{0}; // LBRs are stored in reverse execution order. NextLBR refers to the next // executed branch record. const LBREntry *NextLBR = nullptr; @@ -1487,83 +1487,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, ++Info.TakenCount; Info.MispredCount += LBR.Mispred; } -} - -void DataAggregator::printColdSamplesDiagnostic() const { - if (NumColdSamples > 0) { - const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; - outs() << "PERF2BOLT: " << NumColdSamples - << format(" (%.1f%%)", ColdSamples) - << " samples recorded in cold regions of split functions.\n"; - if (ColdSamples > 5.0f) - outs() - << "WARNING: The BOLT-processed binary where samples were collected " - "likely used bad data or your service observed a large shift in " - "profile. You may want to audit this\n"; - } -} - -void DataAggregator::printLongRangeTracesDiagnostic() const { - outs() << "PERF2BOLT: out of range traces involving unknown regions: " - << NumLongRangeTraces; - if (NumTraces > 0) - outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); - outs() << "\n"; -} - -static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1, - float T2) { - if (Denominator == 0) { - outs() << "\n"; - return 0; - } - float Percent = Numerator * 100.0f / Denominator; - outs() << " ("; - if (outs().has_colors()) { - if (Percent > T2) - outs().changeColor(raw_ostream::RED); - else if (Percent > T1) - outs().changeColor(raw_ostream::YELLOW); - else - outs().changeColor(raw_ostream::GREEN); - } - outs() << format("%.1f%%", Percent); - if (outs().has_colors()) - outs().resetColor(); - outs() << ")\n"; - return Percent; -} - -void DataAggregator::printBranchSamplesDiagnostics() const { - outs() << "PERF2BOLT: traces mismatching disassembled function contents: " - << NumInvalidTraces; - if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10) - outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " - "binary is probably not the same binary used during profiling " - "collection. The generated data may be ineffective for improving " - "performance\n\n"; - printLongRangeTracesDiagnostic(); - printColdSamplesDiagnostic(); -} - -void DataAggregator::printBasicSamplesDiagnostics( - uint64_t OutOfRangeSamples) const { - outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " - << OutOfRangeSamples; - if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80) - outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " - "binary is probably not the same binary used during profiling " - "collection. The generated data may be ineffective for improving " - "performance\n\n"; - printColdSamplesDiagnostic(); -} - -void DataAggregator::printBranchStacksDiagnostics( - uint64_t IgnoredSamples) const { - outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples; - if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50) - errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " - "were attributed to the input binary\n"; + return NumTraces; } std::error_code DataAggregator::parseBranchEvents() { @@ -1571,9 +1495,11 @@ std::error_code DataAggregator::parseBranchEvents() { NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); + uint64_t NumTotalSamples = 0; uint64_t NumEntries = 0; uint64_t NumSamples = 0; uint64_t NumSamplesNoLBR = 0; + uint64_t NumTraces = 0; bool NeedsSkylakeFix = false; while (hasData() && NumTotalSamples < opts::MaxSamples) { @@ -1600,7 +1526,7 @@ std::error_code DataAggregator::parseBranchEvents() { NeedsSkylakeFix = true; } - parseLBRSample(Sample, NeedsSkylakeFix); + NumTraces += parseLBRSample(Sample, NeedsSkylakeFix); } for (const Trace &Trace : llvm::make_first_range(BranchLBRs)) @@ -1608,6 +1534,22 @@ std::error_code DataAggregator::parseBranchEvents() { if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr)) BF->setHasProfileAvailable(); + auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) { + OS << " ("; + if (OS.has_colors()) { + if (Percent > T2) + OS.changeColor(raw_ostream::RED); + else if (Percent > T1) + OS.changeColor(raw_ostream::YELLOW); + else + OS.changeColor(raw_ostream::GREEN); + } + OS << format("%.1f%%", Percent); + if (OS.has_colors()) + OS.resetColor(); + OS << ")"; + }; + outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries << " LBR entries\n"; if (NumTotalSamples) { @@ -1619,10 +1561,47 @@ std::error_code DataAggregator::parseBranchEvents() { "in no-LBR mode with -nl (the performance improvement in -nl " "mode may be limited)\n"; } else { - printBranchStacksDiagnostics(NumTotalSamples - NumSamples); + const uint64_t IgnoredSamples = NumTotalSamples - NumSamples; + const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples; + outs() << "PERF2BOLT: " << IgnoredSamples << " samples"; + printColored(outs(), PercentIgnored, 20, 50); + outs() << " were ignored\n"; + if (PercentIgnored > 50.0f) + errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples " + "were attributed to the input binary\n"; } } - printBranchSamplesDiagnostics(); + outs() << "PERF2BOLT: traces mismatching disassembled function contents: " + << NumInvalidTraces; + float Perc = 0.0f; + if (NumTraces > 0) { + Perc = NumInvalidTraces * 100.0f / NumTraces; + printColored(outs(), Perc, 5, 10); + } + outs() << "\n"; + if (Perc > 10.0f) + outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " + "binary is probably not the same binary used during profiling " + "collection. The generated data may be ineffective for improving " + "performance.\n\n"; + + outs() << "PERF2BOLT: out of range traces involving unknown regions: " + << NumLongRangeTraces; + if (NumTraces > 0) + outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); + outs() << "\n"; + + if (NumColdSamples > 0) { + const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples; + outs() << "PERF2BOLT: " << NumColdSamples + << format(" (%.1f%%)", ColdSamples) + << " samples recorded in cold regions of split functions.\n"; + if (ColdSamples > 5.0f) + outs() + << "WARNING: The BOLT-processed binary where samples were collected " + "likely used bad data or your service observed a large shift in " + "profile. You may want to audit this.\n"; + } return std::error_code(); } @@ -1679,10 +1658,11 @@ void DataAggregator::processBasicEvents() { NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); uint64_t OutOfRangeSamples = 0; + uint64_t NumSamples = 0; for (auto &Sample : BasicSamples) { const uint64_t PC = Sample.first; const uint64_t HitCount = Sample.second; - NumTotalSamples += HitCount; + NumSamples += HitCount; BinaryFunction *Func = getBinaryFunctionContainingAddress(PC); if (!Func) { OutOfRangeSamples += HitCount; @@ -1691,9 +1671,33 @@ void DataAggregator::processBasicEvents() { doSample(*Func, PC, HitCount); } - outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n"; + outs() << "PERF2BOLT: read " << NumSamples << " samples\n"; - printBasicSamplesDiagnostics(OutOfRangeSamples); + outs() << "PERF2BOLT: out of range samples recorded in unknown regions: " + << OutOfRangeSamples; + float Perc = 0.0f; + if (NumSamples > 0) { + outs() << " ("; + Perc = OutOfRangeSamples * 100.0f / NumSamples; + if (outs().has_colors()) { + if (Perc > 60.0f) + outs().changeColor(raw_ostream::RED); + else if (Perc > 40.0f) + outs().changeColor(raw_ostream::YELLOW); + else + outs().changeColor(raw_ostream::GREEN); + } + outs() << format("%.1f%%", Perc); + if (outs().has_colors()) + outs().resetColor(); + outs() << ")"; + } + outs() << "\n"; + if (Perc > 80.0f) + outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " + "binary is probably not the same binary used during profiling " + "collection. The generated data may be ineffective for improving " + "performance.\n\n"; } std::error_code DataAggregator::parseMemEvents() { @@ -1771,13 +1775,13 @@ void DataAggregator::processPreAggregated() { NamedRegionTimer T("processAggregated", "Processing aggregated branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); + uint64_t NumTraces = 0; for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { switch (AggrEntry.EntryType) { case AggregatedLBREntry::BRANCH: case AggregatedLBREntry::TRACE: doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, AggrEntry.Mispreds); - NumTotalSamples += AggrEntry.Count; break; case AggregatedLBREntry::FT: case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { @@ -1795,7 +1799,37 @@ void DataAggregator::processPreAggregated() { outs() << "PERF2BOLT: read " << AggregatedLBRs.size() << " aggregated LBR entries\n"; - printBranchSamplesDiagnostics(); + outs() << "PERF2BOLT: traces mismatching disassembled function contents: " + << NumInvalidTraces; + float Perc = 0.0f; + if (NumTraces > 0) { + outs() << " ("; + Perc = NumInvalidTraces * 100.0f / NumTraces; + if (outs().has_colors()) { + if (Perc > 10.0f) + outs().changeColor(raw_ostream::RED); + else if (Perc > 5.0f) + outs().changeColor(raw_ostream::YELLOW); + else + outs().changeColor(raw_ostream::GREEN); + } + outs() << format("%.1f%%", Perc); + if (outs().has_colors()) + outs().resetColor(); + outs() << ")"; + } + outs() << "\n"; + if (Perc > 10.0f) + outs() << "\n !! WARNING !! This high mismatch ratio indicates the input " + "binary is probably not the same binary used during profiling " + "collection. The generated data may be ineffective for improving " + "performance.\n\n"; + + outs() << "PERF2BOLT: Out of range traces involving unknown regions: " + << NumLongRangeTraces; + if (NumTraces > 0) + outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces); + outs() << "\n"; } std::optional DataAggregator::parseCommExecEvent() { diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 2bdc7b65b4804..e394858163560 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -133,9 +133,12 @@ std::vector YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) { struct BlockProbeInfoHasher { size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const { - return llvm::hash_combine(llvm::hash_combine_range(BPI.BlockProbes), - llvm::hash_combine_range(BPI.CallProbes), - llvm::hash_combine_range(BPI.IndCallProbes)); + auto HashCombine = [](auto &Range) { + return llvm::hash_combine_range(Range.begin(), Range.end()); + }; + return llvm::hash_combine(HashCombine(BPI.BlockProbes), + HashCombine(BPI.CallProbes), + HashCombine(BPI.IndCallProbes)); } }; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 69fb736d7bde0..37dcfa868c211 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -237,12 +237,6 @@ UseGnuStack("use-gnu-stack", cl::ZeroOrMore, cl::cat(BoltCategory)); -static cl::opt CustomAllocationVMA( - "custom-allocation-vma", - cl::desc("use a custom address at which new code will be put, " - "bypassing BOLT's logic to detect where to put code"), - cl::Hidden, cl::cat(BoltCategory)); - static cl::opt SequentialDisassembly("sequential-disassembly", cl::desc("performs disassembly sequentially"), @@ -598,25 +592,6 @@ Error RewriteInstance::discoverStorage() { FirstNonAllocatableOffset = NextAvailableOffset; - if (opts::CustomAllocationVMA) { - // If user specified a custom address where we should start writing new - // data, honor that. - NextAvailableAddress = opts::CustomAllocationVMA; - // Sanity check the user-supplied address and emit warnings if something - // seems off. - for (const ELF64LE::Phdr &Phdr : PHs) { - switch (Phdr.p_type) { - case ELF::PT_LOAD: - if (NextAvailableAddress >= Phdr.p_vaddr && - NextAvailableAddress < Phdr.p_vaddr + Phdr.p_memsz) { - BC->errs() << "BOLT-WARNING: user-supplied allocation vma 0x" - << Twine::utohexstr(NextAvailableAddress) - << " conflicts with ELF segment at 0x" - << Twine::utohexstr(Phdr.p_vaddr) << "\n"; - } - } - } - } NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); @@ -1080,11 +1055,10 @@ void RewriteInstance::discoverFileObjects() { continue; } - if (!Section->isText() || Section->isVirtual()) { + if (!Section->isText()) { assert(SymbolType != SymbolRef::ST_Function && "unexpected function inside non-code section"); - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code or " - "is in nobits section\n"); + LLVM_DEBUG(dbgs() << "BOLT-DEBUG: rejecting as symbol is not in code\n"); registerName(SymbolSize); continue; } diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index e00d6a18b0f6c..106f0a880d780 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -2336,7 +2336,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { std::optional createRelocation(const MCFixup &Fixup, const MCAsmBackend &MAB) const override { - MCFixupKindInfo FKI = MAB.getFixupKindInfo(Fixup.getKind()); + const MCFixupKindInfo &FKI = MAB.getFixupKindInfo(Fixup.getKind()); assert(FKI.TargetOffset == 0 && "0-bit relocation offset expected"); const uint64_t RelOffset = Fixup.getOffset(); diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp index 391c1866c810a..0e27d29019e95 100644 --- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp +++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp @@ -555,9 +555,9 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { .addReg(RegCnt); } - InstructionListType createRegCmpJE(MCPhysReg RegNo, MCPhysReg RegTmp, - const MCSymbol *Target, - MCContext *Ctx) const { + InstructionListType createCmpJE(MCPhysReg RegNo, MCPhysReg RegTmp, + const MCSymbol *Target, + MCContext *Ctx) const { InstructionListType Insts; Insts.emplace_back( MCInstBuilder(RISCV::SUB).addReg(RegTmp).addReg(RegNo).addReg(RegNo)); @@ -718,7 +718,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { Insts.emplace_back(); loadReg(Insts.back(), RISCV::X10, RISCV::X10, 0); InstructionListType cmpJmp = - createRegCmpJE(RISCV::X10, RISCV::X11, IndCallHandler, Ctx); + createCmpJE(RISCV::X10, RISCV::X11, IndCallHandler, Ctx); Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end()); Insts.emplace_back(); createStackPointerIncrement(Insts.back(), 16); @@ -777,13 +777,14 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { return createGetter(Ctx, "__bolt_instr_num_funcs"); } - void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override { + void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg, + MCPhysReg ZeroReg) const { bool IsTailCall = isTailCall(Inst); if (IsTailCall) removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall); Inst.setOpcode(RISCV::ADD); Inst.insert(Inst.begin(), MCOperand::createReg(Reg)); - Inst.insert(Inst.begin() + 1, MCOperand::createReg(RISCV::X0)); + Inst.insert(Inst.begin() + 1, MCOperand::createReg(ZeroReg)); return; } @@ -844,7 +845,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder { InstructionListType Insts; spillRegs(Insts, {RISCV::X10, RISCV::X11}); Insts.emplace_back(CallInst); - convertIndirectCallToLoad(Insts.back(), RISCV::X10); + convertIndirectCallToLoad(Insts.back(), RISCV::X10, RISCV::X0); InstructionListType LoadImm = createLoadImmediate(RISCV::X11, CallSiteID); Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end()); spillRegs(Insts, {RISCV::X10, RISCV::X11}); diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index b909d7fb6bf28..8e459e10244fd 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -2441,7 +2441,7 @@ class X86MCPlusBuilder : public MCPlusBuilder { std::optional createRelocation(const MCFixup &Fixup, const MCAsmBackend &MAB) const override { - MCFixupKindInfo FKI = MAB.getFixupKindInfo(Fixup.getKind()); + const MCFixupKindInfo &FKI = MAB.getFixupKindInfo(Fixup.getKind()); assert(FKI.TargetOffset == 0 && "0-bit relocation offset expected"); const uint64_t RelOffset = Fixup.getOffset(); diff --git a/bolt/test/AArch64/lite-mode.s b/bolt/test/AArch64/lite-mode.s index a71edbe034669..d81206089aaef 100644 --- a/bolt/test/AArch64/lite-mode.s +++ b/bolt/test/AArch64/lite-mode.s @@ -11,13 +11,6 @@ # RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \ # RUN: | FileCheck %s - -## Verify that the number of FDEs matches the number of functions in the output -## binary. There are three original functions and two optimized. -# RUN: llvm-readelf -u %t.bolt | grep -wc FDE \ -# RUN: | FileCheck --check-prefix=CHECK-FDE %s -# CHECK-FDE: 5 - ## In lite mode, optimized code will be separated from the original .text by ## over 128MB, making it impossible for call/bl instructions in cold functions ## to reach optimized functions directly. diff --git a/bolt/test/X86/high-segments.s b/bolt/test/X86/high-segments.s deleted file mode 100644 index dfddf18004c2a..0000000000000 --- a/bolt/test/X86/high-segments.s +++ /dev/null @@ -1,46 +0,0 @@ -// Check that we are able to rewrite binaries when we fail to identify a -// suitable location to put new code and user supplies a custom one via -// --custom-allocation-vma. This happens more obviously if the binary has -// segments mapped to very high addresses. - -// In this example, my.reserved.section is mapped to a segment to be loaded -// at address 0x10000000000, while regular text should be at 0x200000. We -// pick a vma in the middle at 0x700000 to carve space for BOLT to put data, -// since BOLT's usual route of allocating after the last segment will put -// code far away and that will blow up relocations from main. - -// RUN: split-file %s %t -// RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o -// RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-T %t/main.ls -// RUN: llvm-bolt %t.exe -o %t.bolt --custom-allocation-vma=0x700000 - -//--- main.s - .type reserved_space,@object - .section .my.reserved.section,"awx",@nobits - .globl reserved_space - .p2align 4, 0x0 -reserved_space: - .zero 0x80000000 - .size reserved_space, 0x80000000 - - .text - .globl main - .globl _start - .type main, %function -_start: -main: - .cfi_startproc - nop - nop - nop - retq - .cfi_endproc -.size main, .-main - -//--- main.ls -SECTIONS -{ - .my.reserved.section 1<<40 : { - *(.my.reserved.section); - } -} INSERT BEFORE .comment; diff --git a/bolt/test/X86/nobits-symbol.s b/bolt/test/X86/nobits-symbol.s deleted file mode 100644 index 4110c79930b29..0000000000000 --- a/bolt/test/X86/nobits-symbol.s +++ /dev/null @@ -1,24 +0,0 @@ -## Check that llvm-bolt doesn't choke on symbols defined in nobits sections. - -# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o -# RUN: %clang %cflags %t.o -o %t.exe -# RUN: llvm-bolt %t.exe -o %t -# - - .type symbol_in_nobits,@object - .section .my.nobits.section,"awx",@nobits - .globl symbol_in_nobits - .p2align 4, 0x0 -symbol_in_nobits: - .zero 0x100000 - .size symbol_in_nobits, 0x100000 - - .text - .globl main - .type main, %function -main: - .cfi_startproc -.LBB06: - retq - .cfi_endproc -.size main, .-main diff --git a/bolt/test/X86/profile-quality-reporting-small-binary.s b/bolt/test/X86/profile-quality-reporting-small-binary.s deleted file mode 100644 index 2b147c5eca81e..0000000000000 --- a/bolt/test/X86/profile-quality-reporting-small-binary.s +++ /dev/null @@ -1,35 +0,0 @@ -## Test that BOLT-INFO is correctly formatted after profile quality reporting for -## a small binary. - -# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o -# RUN: link_fdata %s %t.o %t.fdata -# RUN: llvm-strip --strip-unneeded %t.o -# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolt --data=%t.fdata \ -# RUN: 2>&1 | FileCheck %s - -# CHECK: BOLT-INFO: profile quality metrics for the hottest 2 functions (reporting top 5% values): function CFG discontinuity 0.00%; call graph flow conservation gap 0.00%; CFG flow conservation gap 0.00% (weighted) 0.00% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC) -# CHECK-NEXT: BOLT-INFO: - - .text - .globl func - .type func, @function -func: - pushq %rbp - ret -LLfunc_end: - .size func, LLfunc_end-func - - - .globl main - .type main, @function -main: - pushq %rbp - movq %rsp, %rbp -LLmain_func: - call func -# FDATA: 1 main #LLmain_func# 1 func 0 0 500 - movl $4, %edi - retq -.Lmain_end: - .size main, .Lmain_end-main diff --git a/bolt/test/X86/profile-quality-reporting.test b/bolt/test/X86/profile-quality-reporting.test index 210d3e10a3890..2e15a6b245afa 100644 --- a/bolt/test/X86/profile-quality-reporting.test +++ b/bolt/test/X86/profile-quality-reporting.test @@ -1,4 +1,4 @@ ## Check profile quality stats reporting RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s -CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst); exception handling usage 0.00% (of total BBEC) 0.00% (of total InvokeEC) +CHECK: profile quality metrics for the hottest 5 functions (reporting top 5% values): function CFG discontinuity 100.00%; call graph flow conservation gap 60.00%; CFG flow conservation gap 45.53% (weighted) 96.87% (worst) diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py index bcf9a777922d5..f1d83458346d6 100755 --- a/bolt/test/link_fdata.py +++ b/bolt/test/link_fdata.py @@ -9,7 +9,6 @@ import argparse import os -import shutil import subprocess import sys import re @@ -86,7 +85,7 @@ exit("ERROR: unexpected input:\n%s" % line) # Read nm output: -is_llvm_nm = os.path.basename(os.path.realpath(shutil.which(args.nmtool))) == "llvm-nm" +is_llvm_nm = os.path.basename(args.nmtool) == "llvm-nm" nm_output = subprocess.run( [ args.nmtool, diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp index 7016dec0e3574..a3113cab3d334 100644 --- a/bolt/unittests/Core/MCPlusBuilder.cpp +++ b/bolt/unittests/Core/MCPlusBuilder.cpp @@ -8,7 +8,6 @@ #ifdef AARCH64_AVAILABLE #include "AArch64Subtarget.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" #endif // AARCH64_AVAILABLE #ifdef X86_AVAILABLE @@ -20,7 +19,6 @@ #include "bolt/Rewrite/RewriteInstance.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/TargetSelect.h" #include "gtest/gtest.h" @@ -72,28 +70,16 @@ struct MCPlusBuilderTester : public testing::TestWithParam { BC->MRI.get(), BC->STI.get()))); } - void assertRegMask(const BitVector &RegMask, - std::initializer_list ExpectedRegs) { - ASSERT_EQ(RegMask.count(), ExpectedRegs.size()); - for (MCPhysReg Reg : ExpectedRegs) - ASSERT_TRUE(RegMask[Reg]) << "Expected " << BC->MRI->getName(Reg) << "."; - } - - void assertRegMask(std::function FillRegMask, - std::initializer_list ExpectedRegs) { - BitVector RegMask(BC->MRI->getNumRegs()); - FillRegMask(RegMask); - assertRegMask(RegMask, ExpectedRegs); - } - void testRegAliases(Triple::ArchType Arch, uint64_t Register, - std::initializer_list ExpectedAliases, + uint64_t *Aliases, size_t Count, bool OnlySmaller = false) { if (GetParam() != Arch) GTEST_SKIP(); const BitVector &BV = BC->MIB->getAliases(Register, OnlySmaller); - assertRegMask(BV, ExpectedAliases); + ASSERT_EQ(BV.count(), Count); + for (size_t I = 0; I < Count; ++I) + ASSERT_TRUE(BV[Aliases[I]]); } char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {}; @@ -108,15 +94,17 @@ INSTANTIATE_TEST_SUITE_P(AArch64, MCPlusBuilderTester, ::testing::Values(Triple::aarch64)); TEST_P(MCPlusBuilderTester, AliasX0) { - testRegAliases(Triple::aarch64, AArch64::X0, - {AArch64::W0, AArch64::W0_HI, AArch64::X0, AArch64::W0_W1, - AArch64::X0_X1, AArch64::X0_X1_X2_X3_X4_X5_X6_X7}); + uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI, + AArch64::X0, AArch64::W0_W1, + AArch64::X0_X1, AArch64::X0_X1_X2_X3_X4_X5_X6_X7}; + size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0); + testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count); } TEST_P(MCPlusBuilderTester, AliasSmallerX0) { - testRegAliases(Triple::aarch64, AArch64::X0, - {AArch64::W0, AArch64::W0_HI, AArch64::X0}, - /*OnlySmaller=*/true); + uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI, AArch64::X0}; + size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0); + testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count, true); } TEST_P(MCPlusBuilderTester, AArch64_CmpJE) { @@ -167,100 +155,6 @@ TEST_P(MCPlusBuilderTester, AArch64_CmpJNE) { ASSERT_EQ(Label, BB->getLabel()); } -TEST_P(MCPlusBuilderTester, testAccessedRegsImplicitDef) { - if (GetParam() != Triple::aarch64) - GTEST_SKIP(); - - // adds x0, x5, #42 - MCInst Inst = MCInstBuilder(AArch64::ADDSXri) - .addReg(AArch64::X0) - .addReg(AArch64::X5) - .addImm(42) - .addImm(0); - - assertRegMask([&](BitVector &BV) { BC->MIB->getClobberedRegs(Inst, BV); }, - {AArch64::NZCV, AArch64::W0, AArch64::X0, AArch64::W0_HI, - AArch64::X0_X1_X2_X3_X4_X5_X6_X7, AArch64::W0_W1, - AArch64::X0_X1}); - - assertRegMask( - [&](BitVector &BV) { BC->MIB->getTouchedRegs(Inst, BV); }, - {AArch64::NZCV, AArch64::W0, AArch64::W5, AArch64::X0, AArch64::X5, - AArch64::W0_HI, AArch64::W5_HI, AArch64::X0_X1_X2_X3_X4_X5_X6_X7, - AArch64::X2_X3_X4_X5_X6_X7_X8_X9, AArch64::X4_X5_X6_X7_X8_X9_X10_X11, - AArch64::W0_W1, AArch64::W4_W5, AArch64::X0_X1, AArch64::X4_X5}); - - assertRegMask([&](BitVector &BV) { BC->MIB->getWrittenRegs(Inst, BV); }, - {AArch64::NZCV, AArch64::W0, AArch64::X0, AArch64::W0_HI}); - - assertRegMask([&](BitVector &BV) { BC->MIB->getUsedRegs(Inst, BV); }, - {AArch64::W5, AArch64::X5, AArch64::W5_HI}); - - assertRegMask([&](BitVector &BV) { BC->MIB->getSrcRegs(Inst, BV); }, - {AArch64::W5, AArch64::X5, AArch64::W5_HI}); -} - -TEST_P(MCPlusBuilderTester, testAccessedRegsImplicitUse) { - if (GetParam() != Triple::aarch64) - GTEST_SKIP(); - - // b.eq