Skip to content

Commit 672bfad

Browse files
committed
Merge branch 'main' into amd-trunk-dev
2 parents b29b413 + a393c92 commit 672bfad

File tree

4,949 files changed

+426989
-59394
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

4,949 files changed

+426989
-59394
lines changed

.github/workflows/containers/github-action-ci/stage1.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:22.04 as base
22
ENV LLVM_SYSROOT=/opt/llvm
33

44
FROM base as stage1-toolchain
5-
ENV LLVM_VERSION=18.1.8
5+
ENV LLVM_VERSION=19.1.2
66

77
RUN apt-get update && \
88
apt-get install -y \

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,9 @@ class BinaryFunction {
386386
/// Raw branch count for this function in the profile.
387387
uint64_t RawBranchCount{0};
388388

389+
/// Dynamically executed function bytes, used for density computation.
390+
uint64_t SampleCountInBytes{0};
391+
389392
/// Indicates the type of profile the function is using.
390393
uint16_t ProfileFlags{PF_NONE};
391394

@@ -1844,6 +1847,9 @@ class BinaryFunction {
18441847
/// to this function.
18451848
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
18461849

1850+
/// Return the number of dynamically executed bytes, from raw perf data.
1851+
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
1852+
18471853
/// Return the execution count for functions with known profile.
18481854
/// Return 0 if the function has no profile.
18491855
uint64_t getKnownExecutionCount() const {

bolt/include/bolt/Core/DIEBuilder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ class DIEBuilder {
314314

315315
BC.errs()
316316
<< "BOLT-ERROR: unable to find TypeUnit for Type Unit at offset 0x"
317-
<< DU.getOffset() << "\n";
317+
<< Twine::utohexstr(DU.getOffset()) << "\n";
318318
return nullptr;
319319
}
320320

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
5555
enum ProfileFormatKind { PF_Fdata, PF_YAML };
5656

5757
extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
58+
extern llvm::cl::opt<bool> ShowDensity;
5859
extern llvm::cl::opt<bool> SplitEH;
5960
extern llvm::cl::opt<bool> StrictMode;
6061
extern llvm::cl::opt<bool> TimeOpts;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,8 +1294,8 @@ bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
12941294
Veneer->getOrCreateLocalLabel(Address);
12951295
Veneer->setMaxSize(TotalSize);
12961296
Veneer->updateState(BinaryFunction::State::Disassembled);
1297-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1298-
<< "\n");
1297+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1298+
<< Twine::utohexstr(Address) << "\n");
12991299
return true;
13001300
};
13011301

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ void DIEBuilder::buildTypeUnits(DebugStrOffsetsWriter *StrOffsetWriter,
281281
for (auto &Row : TUIndex.getRows()) {
282282
uint64_t Signature = Row.getSignature();
283283
// manually populate TypeUnit to UnitVector
284-
DwarfContext->getTypeUnitForHash(DwarfContext->getMaxVersion(), Signature,
285-
true);
284+
DwarfContext->getTypeUnitForHash(Signature, true);
286285
}
287286
}
288287
const unsigned int CUNum = getCUNum(DwarfContext, isDWO());

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Core/ParallelUtilities.h"
1616
#include "bolt/Passes/ReorderAlgorithm.h"
1717
#include "bolt/Passes/ReorderFunctions.h"
18+
#include "bolt/Utils/CommandLineOpts.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include <atomic>
2021
#include <mutex>
@@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit(
223224
"functions section"),
224225
cl::init(100), cl::Hidden, cl::cat(BoltCategory));
225226

227+
// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
228+
static cl::opt<int> ProfileDensityCutOffHot(
229+
"profile-density-cutoff-hot", cl::init(990000),
230+
cl::desc("Total samples cutoff for functions used to calculate "
231+
"profile density."));
232+
233+
static cl::opt<double> ProfileDensityThreshold(
234+
"profile-density-threshold", cl::init(60),
235+
cl::desc("If the profile density is below the given threshold, it "
236+
"will be suggested to increase the sampling rate."),
237+
cl::Optional);
238+
226239
} // namespace opts
227240

228241
namespace llvm {
@@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831396
uint64_t StaleSampleCount = 0;
13841397
uint64_t InferredSampleCount = 0;
13851398
std::vector<const BinaryFunction *> ProfiledFunctions;
1399+
std::vector<std::pair<double, uint64_t>> FuncDensityList;
13861400
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
13871401
for (auto &BFI : BC.getBinaryFunctions()) {
13881402
const BinaryFunction &Function = BFI.second;
@@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411455
StaleSampleCount += SampleCount;
14421456
++NumAllStaleFunctions;
14431457
}
1458+
1459+
if (opts::ShowDensity) {
1460+
uint64_t Size = Function.getSize();
1461+
// In case of BOLT split functions registered in BAT, executed traces are
1462+
// automatically attributed to the main fragment. Add up function sizes
1463+
// for all fragments.
1464+
if (IsHotParentOfBOLTSplitFunction)
1465+
for (const BinaryFunction *Fragment : Function.getFragments())
1466+
Size += Fragment->getSize();
1467+
double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
1468+
FuncDensityList.emplace_back(Density, SampleCount);
1469+
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
1470+
<< Function.getSampleCountInBytes() << ", size (b) "
1471+
<< Size << ", density " << Density
1472+
<< ", sample count " << SampleCount << '\n');
1473+
}
14441474
}
14451475
BC.NumProfiledFuncs = ProfiledFunctions.size();
14461476
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841714
BC.outs() << ". Use -print-unknown to see the list.";
16851715
BC.outs() << '\n';
16861716
}
1717+
1718+
if (opts::ShowDensity) {
1719+
double Density = 0.0;
1720+
// Sorted by the density in descending order.
1721+
llvm::stable_sort(FuncDensityList,
1722+
[&](const std::pair<double, uint64_t> &A,
1723+
const std::pair<double, uint64_t> &B) {
1724+
if (A.first != B.first)
1725+
return A.first > B.first;
1726+
return A.second < B.second;
1727+
});
1728+
1729+
uint64_t AccumulatedSamples = 0;
1730+
uint32_t I = 0;
1731+
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
1732+
"The cutoff value is greater than 1000000(100%)");
1733+
while (AccumulatedSamples <
1734+
TotalSampleCount *
1735+
static_cast<float>(opts::ProfileDensityCutOffHot) /
1736+
1000000 &&
1737+
I < FuncDensityList.size()) {
1738+
AccumulatedSamples += FuncDensityList[I].second;
1739+
Density = FuncDensityList[I].first;
1740+
I++;
1741+
}
1742+
if (Density == 0.0) {
1743+
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
1744+
"--profile-density-cutoff-hot option is "
1745+
"set too low. Please check your command.\n";
1746+
} else if (Density < opts::ProfileDensityThreshold) {
1747+
BC.errs()
1748+
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
1749+
<< format("%.1f", opts::ProfileDensityThreshold / Density)
1750+
<< "x more samples. Please consider increasing sampling rate or "
1751+
"profiling for longer duration to get more samples.\n";
1752+
}
1753+
1754+
BC.outs() << "BOLT-INFO: Functions with density >= "
1755+
<< format("%.1f", Density) << " account for "
1756+
<< format("%.2f",
1757+
static_cast<double>(opts::ProfileDensityCutOffHot) /
1758+
10000)
1759+
<< "% total sample counts.\n";
1760+
}
16871761
return Error::success();
16881762
}
16891763

bolt/lib/Passes/VeneerElimination.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,12 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) {
7373
continue;
7474

7575
const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Instr, 0);
76-
if (VeneerDestinations.find(TargetSymbol) == VeneerDestinations.end())
76+
auto It = VeneerDestinations.find(TargetSymbol);
77+
if (It == VeneerDestinations.end())
7778
continue;
7879

7980
VeneerCallers++;
80-
BC.MIB->replaceBranchTarget(Instr, VeneerDestinations[TargetSymbol],
81-
BC.Ctx.get());
81+
BC.MIB->replaceBranchTarget(Instr, It->second, BC.Ctx.get());
8282
}
8383
}
8484
}

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
638638
: BinaryFunction::PF_LBR;
639639
for (auto &BFI : BC.getBinaryFunctions()) {
640640
BinaryFunction &BF = BFI.second;
641-
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
641+
FuncBranchData *FBD = getBranchData(BF);
642+
if (FBD || getFuncSampleData(BF.getNames())) {
642643
BF.markProfiled(Flags);
644+
if (FBD)
645+
BF.RawBranchCount = FBD->getNumExecutedBranches();
646+
}
643647
}
644648

645649
for (auto &FuncBranches : NamesToBranches)
@@ -845,6 +849,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
845849
return false;
846850
}
847851

852+
// Set ParentFunc to BAT parent function or FromFunc itself.
853+
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
854+
if (!ParentFunc)
855+
ParentFunc = FromFunc;
856+
ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
857+
848858
std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
849859
BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
850860
Second.From)
@@ -864,13 +874,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
864874
<< FromFunc->getPrintName() << ":"
865875
<< Twine::utohexstr(First.To) << " to "
866876
<< Twine::utohexstr(Second.From) << ".\n");
867-
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
868877
for (auto [From, To] : *FTs) {
869878
if (BAT) {
870879
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
871880
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
872881
}
873-
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
882+
doIntraBranch(*ParentFunc, From, To, Count, false);
874883
}
875884

876885
return true;

bolt/lib/Rewrite/DWARFRewriter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1362,7 +1362,7 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
13621362
Die.getTag() == dwarf::DW_TAG_compile_unit)) {
13631363
if (opts::Verbosity >= 1)
13641364
errs() << "BOLT-WARNING: cannot update ranges for DIE in Unit offset 0x"
1365-
<< Unit.getOffset() << '\n';
1365+
<< Twine::utohexstr(Unit.getOffset()) << '\n';
13661366
}
13671367
}
13681368

0 commit comments

Comments
 (0)