diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index 4765eb28d53ca..1b61173408031 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -246,6 +246,9 @@ struct FuncSampleData { /// Get the number of samples recorded in [Start, End) uint64_t getSamples(uint64_t Start, uint64_t End) const; + /// Returns the total number of samples recorded in this function. + uint64_t getSamples() const; + /// Aggregation helper DenseMap Index; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index cccb8aa98e071..420e8c48443da 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -565,15 +565,14 @@ void DataAggregator::processProfile(BinaryContext &BC) { processMemEvents(); // Mark all functions with registered events as having a valid profile. - const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE - : BinaryFunction::PF_LBR; for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - FuncBranchData *FBD = getBranchData(BF); - if (FBD || getFuncSampleData(BF.getNames())) { - BF.markProfiled(Flags); - if (FBD) - BF.RawBranchCount = FBD->getNumExecutedBranches(); + if (FuncBranchData *FBD = getBranchData(BF)) { + BF.markProfiled(BinaryFunction::PF_LBR); + BF.RawBranchCount = FBD->getNumExecutedBranches(); + } else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) { + BF.markProfiled(BinaryFunction::PF_SAMPLE); + BF.RawBranchCount = FSD->getSamples(); } } @@ -630,10 +629,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func, bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address, uint64_t Count) { + // To record executed bytes, use basic block size as is regardless of BAT. + uint64_t BlockSize = 0; + if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset( + Address - OrigFunc.getAddress())) + BlockSize = BB->getOriginalSize(); + BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc); BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc; - if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress()))) + if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress()))) NumColdSamples += Count; + // Attach executed bytes to parent function in case of cold fragment. + Func.SampleCountInBytes += Count * BlockSize; auto I = NamesToSamples.find(Func.getOneName()); if (I == NamesToSamples.end()) { diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index f2e999bbfdc6d..42b1557db3f02 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const { return Result; } +uint64_t FuncSampleData::getSamples() const { + uint64_t Result = 0; + for (const SampleInfo &I : Data) + Result += I.Hits; + return Result; +} + void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) { auto Iter = Index.find(Offset); if (Iter == Index.end()) { diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index 7bec4420214d6..44111de89a4ea 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s CHECK-NOT: PERF2BOLT-ERROR CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. +CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts. RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4