From 7f4eaaaa64e80570c4a651d93f6cf64d58622214 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 15:25:03 -0700 Subject: [PATCH 1/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 [skip ci] --- bolt/include/bolt/Core/BinaryFunction.h | 6 ++++++ bolt/lib/Core/BinaryFunction.cpp | 2 ++ bolt/lib/Profile/DataAggregator.cpp | 6 +++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 24c7db2f5d69c..2c8e23999b60c 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -242,6 +242,9 @@ class BinaryFunction { /// Original size of the function. uint64_t Size; + /// Original instruction count of the function, if disassembly succeeded. + uint64_t InputInstructionCount{0}; + /// Address of the function in output. uint64_t OutputAddress{0}; @@ -2173,6 +2176,9 @@ class BinaryFunction { /// Get the number of instructions within this function. uint64_t getInstructionCount() const; + /// Get the original number of instructions. + uint64_t getInputInstructionCount() const { return InputInstructionCount; } + const CFIInstrMapType &getFDEProgram() const { return FrameInstructions; } void moveRememberRestorePair(BinaryBasicBlock *BB); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index ea09371b57e8a..1f5869cd0ab73 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1499,6 +1499,8 @@ Error BinaryFunction::disassemble() { clearList(Relocations); + InputInstructionCount = Instructions.size(); + if (!IsSimple) { clearList(Instructions); return createNonFatalBOLTError(""); diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index a300e5b2b1dab..83f773a5f1a29 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) { : BinaryFunction::PF_LBR; for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &BF = BFI.second; - if (getBranchData(BF) || getFuncSampleData(BF.getNames())) + FuncBranchData *FBD = getBranchData(BF); + if (FBD || getFuncSampleData(BF.getNames())) { BF.markProfiled(Flags); + if (FBD) + BF.RawBranchCount = FBD->getNumExecutedBranches(); + } } for (auto &FuncBranches : NamesToBranches) From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 16:14:08 -0700 Subject: [PATCH 2/8] Update test to check the option with llvm-bolt with fdata, YAML, and pre-aggregated profile Created using spr 1.3.4 --- bolt/test/X86/pre-aggregated-perf.test | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index fc6f332d53dfb..0f5137309e85d 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -15,9 +15,15 @@ RUN: --show-density --profile-density-threshold=9 \ RUN: --profile-density-cutoff-hot=970000 \ RUN: --profile-use-dfs | FileCheck %s -RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s +RUN: llvm-bolt %t.exe -data %t -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=970000 | FileCheck %s +RUN: llvm-bolt %t.exe -data %t.new -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=970000 | FileCheck %s +RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=970000 | FileCheck %s CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total sample counts. From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 20:09:08 -0700 Subject: [PATCH 3/8] show-density init(true) Created using spr 1.3.4 --- bolt/lib/Passes/BinaryPasses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 23009bf74e077..83fd6b2562eca 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -224,7 +224,7 @@ static cl::opt TopCalledLimit( cl::init(100), cl::Hidden, cl::cat(BoltCategory)); // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp -static cl::opt ShowDensity("show-density", cl::init(false), +static cl::opt ShowDensity("show-density", cl::init(true), cl::desc("show profile density details"), cl::Optional); From 0d5291b01264a5387f8afd9fb69baf55fdc409a7 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 Aug 2024 11:17:57 -0700 Subject: [PATCH 4/8] show-density off by default Created using spr 1.3.4 --- bolt/lib/Passes/BinaryPasses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index e0ad2af63a384..0dc4a37e0ba94 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -224,7 +224,7 @@ static cl::opt TopCalledLimit( cl::init(100), cl::Hidden, cl::cat(BoltCategory)); // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp -static cl::opt ShowDensity("show-density", cl::init(true), +static cl::opt ShowDensity("show-density", cl::init(false), cl::desc("show profile density details"), cl::Optional); From f20e9618d0ab9602ebde9ec518ae194fbc323382 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 12 Aug 2024 14:46:21 -0700 Subject: [PATCH 5/8] s/ExecutedBytes/SampleCountInBytes Created using spr 1.3.4 --- bolt/include/bolt/Core/BinaryFunction.h | 4 ++-- bolt/lib/Passes/BinaryPasses.cpp | 4 ++-- bolt/lib/Profile/DataAggregator.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 73d0d48c907e3..54ee4748b54c2 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -388,7 +388,7 @@ class BinaryFunction { uint64_t RawBranchCount{0}; /// Dynamically executed function bytes, used for density computation. - uint64_t ExecutedBytes{0}; + uint64_t SampleCountInBytes{0}; /// Indicates the type of profile the function is using. uint16_t ProfileFlags{PF_NONE}; @@ -1847,7 +1847,7 @@ class BinaryFunction { void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; } /// Return the number of dynamically executed bytes, from raw perf data. - uint64_t getExecutedBytes() const { return ExecutedBytes; } + uint64_t getSampleCountInBytes() const { return SampleCountInBytes; } /// Return the execution count for functions with known profile. /// Return 0 if the function has no profile. diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 0dc4a37e0ba94..a9935f02862f8 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1467,10 +1467,10 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { if (IsHotParentOfBOLTSplitFunction) for (const BinaryFunction *Fragment : Function.getFragments()) Size += Fragment->getSize(); - double Density = (double)1.0 * Function.getExecutedBytes() / Size; + double Density = (double)1.0 * Function.getSampleCountInBytes() / Size; FuncDensityList.emplace_back(Density, SampleCount); LLVM_DEBUG(BC.outs() << Function << ": executed bytes " - << Function.getExecutedBytes() << ", size (b) " + << Function.getSampleCountInBytes() << ", size (b) " << Size << ", density " << Density << ", sample count " << SampleCount << '\n'); } diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index c26705e49774c..a9c55c98f54a9 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -853,7 +853,7 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc); if (!ParentFunc) ParentFunc = FromFunc; - ParentFunc->ExecutedBytes += Count * (Second.From - First.To); + ParentFunc->SampleCountInBytes += Count * (Second.From - First.To); std::optional FTs = BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To, From f0b70141d43fc49c158467040c2b01f32a6a5e0e Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Thu, 24 Oct 2024 13:37:10 -0700 Subject: [PATCH 6/8] Only enable show-density for perf2bolt Created using spr 1.3.4 --- bolt/include/bolt/Utils/CommandLineOpts.h | 1 + bolt/lib/Passes/BinaryPasses.cpp | 5 +---- bolt/lib/Utils/CommandLineOpts.cpp | 4 ++++ bolt/tools/driver/llvm-bolt.cpp | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index baabeab577fb5..04bf7db5de952 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -55,6 +55,7 @@ extern llvm::cl::opt PrintSections; enum ProfileFormatKind { PF_Fdata, PF_YAML }; extern llvm::cl::opt ProfileFormat; +extern llvm::cl::opt ShowDensity; extern llvm::cl::opt SplitEH; extern llvm::cl::opt StrictMode; extern llvm::cl::opt TimeOpts; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index c2c248abf8e26..179fe67caf524 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -15,6 +15,7 @@ #include "bolt/Core/ParallelUtilities.h" #include "bolt/Passes/ReorderAlgorithm.h" #include "bolt/Passes/ReorderFunctions.h" +#include "bolt/Utils/CommandLineOpts.h" #include "llvm/Support/CommandLine.h" #include #include @@ -224,10 +225,6 @@ static cl::opt TopCalledLimit( cl::init(100), cl::Hidden, cl::cat(BoltCategory)); // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp -static cl::opt ShowDensity("show-density", cl::init(true), - cl::desc("show profile density details"), - cl::Optional); - static cl::opt ProfileDensityCutOffHot( "profile-density-cutoff-hot", cl::init(990000), cl::desc("Total samples cutoff for functions used to calculate " diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 435a8fa9cafca..de82420a16713 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -175,6 +175,10 @@ cl::opt SaveProfile("w", cl::desc("save recorded profile to a file"), cl::cat(BoltOutputCategory)); +cl::opt ShowDensity("show-density", + cl::desc("show profile density details"), + cl::Optional, cl::cat(AggregatorCategory)); + cl::opt SplitEH("split-eh", cl::desc("split C++ exception handling code"), cl::Hidden, cl::cat(BoltOptCategory)); diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp index a8d1ac6480893..efa06cd68cb99 100644 --- a/bolt/tools/driver/llvm-bolt.cpp +++ b/bolt/tools/driver/llvm-bolt.cpp @@ -129,6 +129,7 @@ void perf2boltMode(int argc, char **argv) { exit(1); } opts::AggregateOnly = true; + opts::ShowDensity = true; } void boltDiffMode(int argc, char **argv) { From 34f91248e09e1586bf87e7abb8a59e8ad34ec2ef Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Thu, 24 Oct 2024 15:50:39 -0700 Subject: [PATCH 7/8] default threshold=60 Created using spr 1.3.4 --- bolt/lib/Passes/BinaryPasses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 179fe67caf524..5a676185227ec 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -231,7 +231,7 @@ static cl::opt ProfileDensityCutOffHot( "profile density.")); static cl::opt ProfileDensityThreshold( - "profile-density-threshold", cl::init(50), + "profile-density-threshold", cl::init(60), cl::desc("If the profile density is below the given threshold, it " "will be suggested to increase the sampling rate."), cl::Optional); From a06c4bbae2ba4c588297e1df540a6b53578f7aa3 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Thu, 24 Oct 2024 16:09:02 -0700 Subject: [PATCH 8/8] Add test with warning Created using spr 1.3.4 --- bolt/test/X86/pre-aggregated-perf.test | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index ecdc613a8d76b..3242ba22f5916 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -11,13 +11,20 @@ REQUIRES: system-linux RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \ -RUN: --show-density --profile-density-threshold=9 \ -RUN: --profile-density-cutoff-hot=970000 \ +RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \ RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts. +RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \ +RUN: --profile-density-cutoff-hot=970000 \ +RUN: --profile-use-dfs 2>&1 | FileCheck %s --check-prefix=CHECK-WARNING + +CHECK-WARNING: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile +CHECK-WARNING: BOLT-WARNING: BOLT is estimated to optimize better with 2.8x more samples. +CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts. + RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s