From 58548480e97d6c177ce3ef735bed188308699eb5 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 28 Apr 2025 07:56:03 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 --- bolt/include/bolt/Core/BinaryFunction.h | 15 +++++++-------- bolt/include/bolt/Profile/DataReader.h | 3 +++ bolt/lib/Core/BinaryFunction.cpp | 2 +- bolt/lib/Passes/BinaryPasses.cpp | 2 +- bolt/lib/Profile/DataAggregator.cpp | 23 +++++++++++++++-------- bolt/lib/Profile/DataReader.cpp | 11 +++++++++-- bolt/lib/Profile/YAMLProfileReader.cpp | 6 +++--- bolt/test/perf2bolt/perf_test.test | 1 + 8 files changed, 40 insertions(+), 23 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index a52998564ee1b..e82b857446ce2 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -386,8 +386,8 @@ class BinaryFunction { /// Profile match ratio. float ProfileMatchRatio{0.0f}; - /// Raw branch count for this function in the profile. - uint64_t RawBranchCount{0}; + /// Raw sample/branch count for this function in the profile. + uint64_t RawSampleCount{0}; /// Dynamically executed function bytes, used for density computation. uint64_t SampleCountInBytes{0}; @@ -1880,13 +1880,12 @@ class BinaryFunction { /// Return COUNT_NO_PROFILE if there's no profile info. uint64_t getExecutionCount() const { return ExecutionCount; } - /// Return the raw profile information about the number of branch - /// executions corresponding to this function. - uint64_t getRawBranchCount() const { return RawBranchCount; } + /// Return the raw profile information about the number of samples (basic + /// profile) or branch executions (branch profile) recorded in this function. + uint64_t getRawSampleCount() const { return RawSampleCount; } - /// Set the profile data about the number of branch executions corresponding - /// to this function. - void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; } + /// Set raw count of samples or branches recorded in this function. + void setRawSampleCount(uint64_t Count) { RawSampleCount = Count; } /// Return the number of dynamically executed bytes, from raw perf data. uint64_t getSampleCountInBytes() const { return SampleCountInBytes; } diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index 314dcc9115586..a7a0933bd4f03 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -252,6 +252,9 @@ struct FuncSampleData { /// Get the number of samples recorded in [Start, End) uint64_t getSamples(uint64_t Start, uint64_t End) const; + /// Returns the total number of samples recorded in this function. + uint64_t getSamples() const; + /// Aggregation helper DenseMap Index; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 4624abadc701a..fea5101b16cd7 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -471,7 +471,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { OS << "\n Image : 0x" << Twine::utohexstr(getImageAddress()); if (ExecutionCount != COUNT_NO_PROFILE) { OS << "\n Exec Count : " << ExecutionCount; - OS << "\n Branch Count: " << RawBranchCount; + OS << "\n Branch Count: " << RawSampleCount; OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); } diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index d8628c62d8654..420ffc8e01c5c 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1445,7 +1445,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { if (!Function.hasProfile()) continue; - uint64_t SampleCount = Function.getRawBranchCount(); + uint64_t SampleCount = Function.getRawSampleCount(); TotalSampleCount += SampleCount; if (Function.hasValidProfile()) { diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a8a187974418d..a622abd450cf6 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -567,15 +567,14 @@ void DataAggregator::processProfile(BinaryContext &BC) { processMemEvents(); // Mark all functions with registered events as having a valid profile. - const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE - : BinaryFunction::PF_LBR; for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - FuncBranchData *FBD = getBranchData(BF); - if (FBD || getFuncSampleData(BF.getNames())) { - BF.markProfiled(Flags); - if (FBD) - BF.RawBranchCount = FBD->getNumExecutedBranches(); + if (FuncBranchData *FBD = getBranchData(BF)) { + BF.markProfiled(BinaryFunction::PF_LBR); + BF.RawSampleCount = FBD->getNumExecutedBranches(); + } else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) { + BF.markProfiled(BinaryFunction::PF_SAMPLE); + BF.RawSampleCount = FSD->getSamples(); } } @@ -632,10 +631,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func, bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address, uint64_t Count) { + // To record executed bytes, use basic block size as is regardless of BAT. + uint64_t BlockSize = 0; + if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset( + Address - OrigFunc.getAddress())) + BlockSize = BB->getOriginalSize(); + BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc); BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; - if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress()))) + if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress()))) NumColdSamples += Count; + // Attach executed bytes to parent function in case of cold fragment. + Func.SampleCountInBytes += Count * BlockSize; auto I = NamesToSamples.find(Func.getOneName()); if (I == NamesToSamples.end()) { diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index f2e999bbfdc6d..4a92c9eb0a912 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const { return Result; } +uint64_t FuncSampleData::getSamples() const { + uint64_t Result = 0; + for (const SampleInfo &I : Data) + Result += I.Hits; + return Result; +} + void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) { auto Iter = Index.find(Offset); if (Iter == Index.end()) { @@ -407,12 +414,12 @@ void DataReader::matchProfileData(BinaryFunction &BF) { FuncBranchData *FBD = getBranchData(BF); if (FBD) { BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); - BF.RawBranchCount = FBD->getNumExecutedBranches(); + BF.RawSampleCount = FBD->getNumExecutedBranches(); if (BF.ProfileMatchRatio == 1.0f) { if (fetchProfileForOtherEntryPoints(BF)) { BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); BF.ExecutionCount = FBD->ExecutionCount; - BF.RawBranchCount = FBD->getNumExecutedBranches(); + BF.RawSampleCount = FBD->getNumExecutedBranches(); } return; } diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index f5636bfe3e1f1..88b806c7a9ca2 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -177,11 +177,11 @@ bool YAMLProfileReader::parseFunctionProfile( BF.setExecutionCount(YamlBF.ExecCount); - uint64_t FuncRawBranchCount = 0; + uint64_t FuncRawSampleCount = 0; for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) - FuncRawBranchCount += YamlSI.Count; - BF.setRawBranchCount(FuncRawBranchCount); + FuncRawSampleCount += YamlSI.Count; + BF.setRawSampleCount(FuncRawSampleCount); if (BF.empty()) return true; diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index 7bec4420214d6..44111de89a4ea 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s CHECK-NOT: PERF2BOLT-ERROR CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. +CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts. RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4