Skip to content

Commit baf5aac

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-resumephi-vpinst-for-reductions
2 parents 8337aa6 + 2d26ef0 commit baf5aac

File tree

5,828 files changed

+408165
-95339
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

5,828 files changed

+408165
-95339
lines changed

.github/workflows/containers/github-action-ci/stage1.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:22.04 as base
22
ENV LLVM_SYSROOT=/opt/llvm
33

44
FROM base as stage1-toolchain
5-
ENV LLVM_VERSION=18.1.8
5+
ENV LLVM_VERSION=19.1.2
66

77
RUN apt-get update && \
88
apt-get install -y \

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,9 @@ class BinaryFunction {
386386
/// Raw branch count for this function in the profile.
387387
uint64_t RawBranchCount{0};
388388

389+
/// Dynamically executed function bytes, used for density computation.
390+
uint64_t SampleCountInBytes{0};
391+
389392
/// Indicates the type of profile the function is using.
390393
uint16_t ProfileFlags{PF_NONE};
391394

@@ -1844,6 +1847,9 @@ class BinaryFunction {
18441847
/// to this function.
18451848
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
18461849

1850+
/// Return the number of dynamically executed bytes, from raw perf data.
1851+
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
1852+
18471853
/// Return the execution count for functions with known profile.
18481854
/// Return 0 if the function has no profile.
18491855
uint64_t getKnownExecutionCount() const {

bolt/include/bolt/Core/DIEBuilder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ class DIEBuilder {
314314

315315
BC.errs()
316316
<< "BOLT-ERROR: unable to find TypeUnit for Type Unit at offset 0x"
317-
<< DU.getOffset() << "\n";
317+
<< Twine::utohexstr(DU.getOffset()) << "\n";
318318
return nullptr;
319319
}
320320

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
5555
enum ProfileFormatKind { PF_Fdata, PF_YAML };
5656

5757
extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
58+
extern llvm::cl::opt<bool> ShowDensity;
5859
extern llvm::cl::opt<bool> SplitEH;
5960
extern llvm::cl::opt<bool> StrictMode;
6061
extern llvm::cl::opt<bool> TimeOpts;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,8 +1294,8 @@ bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
12941294
Veneer->getOrCreateLocalLabel(Address);
12951295
Veneer->setMaxSize(TotalSize);
12961296
Veneer->updateState(BinaryFunction::State::Disassembled);
1297-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1298-
<< "\n");
1297+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1298+
<< Twine::utohexstr(Address) << "\n");
12991299
return true;
13001300
};
13011301

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ void DIEBuilder::buildTypeUnits(DebugStrOffsetsWriter *StrOffsetWriter,
281281
for (auto &Row : TUIndex.getRows()) {
282282
uint64_t Signature = Row.getSignature();
283283
// manually populate TypeUnit to UnitVector
284-
DwarfContext->getTypeUnitForHash(DwarfContext->getMaxVersion(), Signature,
285-
true);
284+
DwarfContext->getTypeUnitForHash(Signature, true);
286285
}
287286
}
288287
const unsigned int CUNum = getCUNum(DwarfContext, isDWO());

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Core/ParallelUtilities.h"
1616
#include "bolt/Passes/ReorderAlgorithm.h"
1717
#include "bolt/Passes/ReorderFunctions.h"
18+
#include "bolt/Utils/CommandLineOpts.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include <atomic>
2021
#include <mutex>
@@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit(
223224
"functions section"),
224225
cl::init(100), cl::Hidden, cl::cat(BoltCategory));
225226

227+
// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
228+
static cl::opt<int> ProfileDensityCutOffHot(
229+
"profile-density-cutoff-hot", cl::init(990000),
230+
cl::desc("Total samples cutoff for functions used to calculate "
231+
"profile density."));
232+
233+
static cl::opt<double> ProfileDensityThreshold(
234+
"profile-density-threshold", cl::init(60),
235+
cl::desc("If the profile density is below the given threshold, it "
236+
"will be suggested to increase the sampling rate."),
237+
cl::Optional);
238+
226239
} // namespace opts
227240

228241
namespace llvm {
@@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831396
uint64_t StaleSampleCount = 0;
13841397
uint64_t InferredSampleCount = 0;
13851398
std::vector<const BinaryFunction *> ProfiledFunctions;
1399+
std::vector<std::pair<double, uint64_t>> FuncDensityList;
13861400
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
13871401
for (auto &BFI : BC.getBinaryFunctions()) {
13881402
const BinaryFunction &Function = BFI.second;
@@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411455
StaleSampleCount += SampleCount;
14421456
++NumAllStaleFunctions;
14431457
}
1458+
1459+
if (opts::ShowDensity) {
1460+
uint64_t Size = Function.getSize();
1461+
// In case of BOLT split functions registered in BAT, executed traces are
1462+
// automatically attributed to the main fragment. Add up function sizes
1463+
// for all fragments.
1464+
if (IsHotParentOfBOLTSplitFunction)
1465+
for (const BinaryFunction *Fragment : Function.getFragments())
1466+
Size += Fragment->getSize();
1467+
double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
1468+
FuncDensityList.emplace_back(Density, SampleCount);
1469+
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
1470+
<< Function.getSampleCountInBytes() << ", size (b) "
1471+
<< Size << ", density " << Density
1472+
<< ", sample count " << SampleCount << '\n');
1473+
}
14441474
}
14451475
BC.NumProfiledFuncs = ProfiledFunctions.size();
14461476
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841714
BC.outs() << ". Use -print-unknown to see the list.";
16851715
BC.outs() << '\n';
16861716
}
1717+
1718+
if (opts::ShowDensity) {
1719+
double Density = 0.0;
1720+
// Sorted by the density in descending order.
1721+
llvm::stable_sort(FuncDensityList,
1722+
[&](const std::pair<double, uint64_t> &A,
1723+
const std::pair<double, uint64_t> &B) {
1724+
if (A.first != B.first)
1725+
return A.first > B.first;
1726+
return A.second < B.second;
1727+
});
1728+
1729+
uint64_t AccumulatedSamples = 0;
1730+
uint32_t I = 0;
1731+
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
1732+
"The cutoff value is greater than 1000000(100%)");
1733+
while (AccumulatedSamples <
1734+
TotalSampleCount *
1735+
static_cast<float>(opts::ProfileDensityCutOffHot) /
1736+
1000000 &&
1737+
I < FuncDensityList.size()) {
1738+
AccumulatedSamples += FuncDensityList[I].second;
1739+
Density = FuncDensityList[I].first;
1740+
I++;
1741+
}
1742+
if (Density == 0.0) {
1743+
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
1744+
"--profile-density-cutoff-hot option is "
1745+
"set too low. Please check your command.\n";
1746+
} else if (Density < opts::ProfileDensityThreshold) {
1747+
BC.errs()
1748+
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
1749+
<< format("%.1f", opts::ProfileDensityThreshold / Density)
1750+
<< "x more samples. Please consider increasing sampling rate or "
1751+
"profiling for longer duration to get more samples.\n";
1752+
}
1753+
1754+
BC.outs() << "BOLT-INFO: Functions with density >= "
1755+
<< format("%.1f", Density) << " account for "
1756+
<< format("%.2f",
1757+
static_cast<double>(opts::ProfileDensityCutOffHot) /
1758+
10000)
1759+
<< "% total sample counts.\n";
1760+
}
16871761
return Error::success();
16881762
}
16891763

bolt/lib/Passes/Instrumentation.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,8 @@ static bool hasAArch64ExclusiveMemop(
109109
BinaryBasicBlock *BB = BBQueue.front().first;
110110
bool IsLoad = BBQueue.front().second;
111111
BBQueue.pop();
112-
if (Visited.find(BB) != Visited.end())
112+
if (!Visited.insert(BB).second)
113113
continue;
114-
Visited.insert(BB);
115114

116115
for (const MCInst &Inst : *BB) {
117116
// Two loads one after another - skip whole function
@@ -126,8 +125,7 @@ static bool hasAArch64ExclusiveMemop(
126125
if (BC.MIB->isAArch64ExclusiveLoad(Inst))
127126
IsLoad = true;
128127

129-
if (IsLoad && BBToSkip.find(BB) == BBToSkip.end()) {
130-
BBToSkip.insert(BB);
128+
if (IsLoad && BBToSkip.insert(BB).second) {
131129
if (opts::Verbosity >= 2) {
132130
outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName()
133131
<< " due to exclusive instruction in function "

bolt/lib/Passes/LongJmp.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,8 @@ uint64_t LongJmpPass::tentativeLayoutRelocColdPart(
324324
uint64_t LongJmpPass::tentativeLayoutRelocMode(
325325
const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions,
326326
uint64_t DotAddress) {
327-
328327
// Compute hot cold frontier
329-
uint32_t LastHotIndex = -1u;
328+
int64_t LastHotIndex = -1u;
330329
uint32_t CurrentIndex = 0;
331330
if (opts::HotFunctionsAtEnd) {
332331
for (BinaryFunction *BF : SortedFunctions) {
@@ -351,19 +350,20 @@ uint64_t LongJmpPass::tentativeLayoutRelocMode(
351350
// Hot
352351
CurrentIndex = 0;
353352
bool ColdLayoutDone = false;
353+
auto runColdLayout = [&]() {
354+
DotAddress = tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress);
355+
ColdLayoutDone = true;
356+
if (opts::HotFunctionsAtEnd)
357+
DotAddress = alignTo(DotAddress, opts::AlignText);
358+
};
354359
for (BinaryFunction *Func : SortedFunctions) {
355360
if (!BC.shouldEmit(*Func)) {
356361
HotAddresses[Func] = Func->getAddress();
357362
continue;
358363
}
359364

360-
if (!ColdLayoutDone && CurrentIndex >= LastHotIndex) {
361-
DotAddress =
362-
tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress);
363-
ColdLayoutDone = true;
364-
if (opts::HotFunctionsAtEnd)
365-
DotAddress = alignTo(DotAddress, opts::AlignText);
366-
}
365+
if (!ColdLayoutDone && CurrentIndex >= LastHotIndex)
366+
runColdLayout();
367367

368368
DotAddress = alignTo(DotAddress, Func->getMinAlignment());
369369
uint64_t Pad =
@@ -382,6 +382,11 @@ uint64_t LongJmpPass::tentativeLayoutRelocMode(
382382
DotAddress += Func->estimateConstantIslandSize();
383383
++CurrentIndex;
384384
}
385+
386+
// Ensure that tentative code layout always runs for cold blocks.
387+
if (!ColdLayoutDone)
388+
runColdLayout();
389+
385390
// BBs
386391
for (BinaryFunction *Func : SortedFunctions)
387392
tentativeBBLayout(*Func);

bolt/lib/Passes/VeneerElimination.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,12 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) {
7373
continue;
7474

7575
const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Instr, 0);
76-
if (VeneerDestinations.find(TargetSymbol) == VeneerDestinations.end())
76+
auto It = VeneerDestinations.find(TargetSymbol);
77+
if (It == VeneerDestinations.end())
7778
continue;
7879

7980
VeneerCallers++;
80-
BC.MIB->replaceBranchTarget(Instr, VeneerDestinations[TargetSymbol],
81-
BC.Ctx.get());
81+
BC.MIB->replaceBranchTarget(Instr, It->second, BC.Ctx.get());
8282
}
8383
}
8484
}

0 commit comments

Comments
 (0)