Skip to content

Commit 0d2677d

Browse files
committed
Reduce CHECKs
Created using spr 1.3.6-beta.1
2 parents f5ff8e0 + d3daa3c commit 0d2677d

File tree

8,751 files changed

+616834
-167385
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

8,751 files changed

+616834
-167385
lines changed

.github/workflows/containers/github-action-ci/stage1.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:22.04 as base
22
ENV LLVM_SYSROOT=/opt/llvm
33

44
FROM base as stage1-toolchain
5-
ENV LLVM_VERSION=18.1.8
5+
ENV LLVM_VERSION=19.1.2
66

77
RUN apt-get update && \
88
apt-get install -y \

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,9 @@ class BinaryFunction {
386386
/// Raw branch count for this function in the profile.
387387
uint64_t RawBranchCount{0};
388388

389+
/// Dynamically executed function bytes, used for density computation.
390+
uint64_t SampleCountInBytes{0};
391+
389392
/// Indicates the type of profile the function is using.
390393
uint16_t ProfileFlags{PF_NONE};
391394

@@ -1844,6 +1847,9 @@ class BinaryFunction {
18441847
/// to this function.
18451848
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
18461849

1850+
/// Return the number of dynamically executed bytes, from raw perf data.
1851+
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
1852+
18471853
/// Return the execution count for functions with known profile.
18481854
/// Return 0 if the function has no profile.
18491855
uint64_t getKnownExecutionCount() const {

bolt/include/bolt/Core/DIEBuilder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ class DIEBuilder {
314314

315315
BC.errs()
316316
<< "BOLT-ERROR: unable to find TypeUnit for Type Unit at offset 0x"
317-
<< DU.getOffset() << "\n";
317+
<< Twine::utohexstr(DU.getOffset()) << "\n";
318318
return nullptr;
319319
}
320320

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,13 @@ class BoltAddressTranslation {
141141
uint64_t FuncOutputAddress) const;
142142

143143
/// Write the serialized address translation table for a function.
144-
template <bool Cold>
145-
void writeMaps(std::map<uint64_t, MapTy> &Maps, uint64_t &PrevAddress,
146-
raw_ostream &OS);
144+
template <bool Cold> void writeMaps(uint64_t &PrevAddress, raw_ostream &OS);
147145

148146
/// Read the serialized address translation table for a function.
149147
/// Return a parse error if failed.
150148
template <bool Cold>
151-
void parseMaps(std::vector<uint64_t> &HotFuncs, uint64_t &PrevAddress,
152-
DataExtractor &DE, uint64_t &Offset, Error &Err);
149+
void parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset,
150+
Error &Err);
153151

154152
/// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
155153
/// entries in function address translation map.
@@ -161,6 +159,9 @@ class BoltAddressTranslation {
161159

162160
std::map<uint64_t, MapTy> Maps;
163161

162+
/// Ordered vector with addresses of hot functions.
163+
std::vector<uint64_t> HotFuncs;
164+
164165
/// Map a function to its basic blocks count
165166
std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;
166167

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
5555
enum ProfileFormatKind { PF_Fdata, PF_YAML };
5656

5757
extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
58+
extern llvm::cl::opt<bool> ShowDensity;
5859
extern llvm::cl::opt<bool> SplitEH;
5960
extern llvm::cl::opt<bool> StrictMode;
6061
extern llvm::cl::opt<bool> TimeOpts;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,8 +1294,8 @@ bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
12941294
Veneer->getOrCreateLocalLabel(Address);
12951295
Veneer->setMaxSize(TotalSize);
12961296
Veneer->updateState(BinaryFunction::State::Disassembled);
1297-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1298-
<< "\n");
1297+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1298+
<< Twine::utohexstr(Address) << "\n");
12991299
return true;
13001300
};
13011301

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,6 +2577,7 @@ struct CFISnapshot {
25772577
case MCCFIInstruction::OpAdjustCfaOffset:
25782578
case MCCFIInstruction::OpWindowSave:
25792579
case MCCFIInstruction::OpNegateRAState:
2580+
case MCCFIInstruction::OpNegateRAStateWithPC:
25802581
case MCCFIInstruction::OpLLVMDefAspaceCfa:
25812582
case MCCFIInstruction::OpLabel:
25822583
llvm_unreachable("unsupported CFI opcode");
@@ -2715,6 +2716,7 @@ struct CFISnapshotDiff : public CFISnapshot {
27152716
case MCCFIInstruction::OpAdjustCfaOffset:
27162717
case MCCFIInstruction::OpWindowSave:
27172718
case MCCFIInstruction::OpNegateRAState:
2719+
case MCCFIInstruction::OpNegateRAStateWithPC:
27182720
case MCCFIInstruction::OpLLVMDefAspaceCfa:
27192721
case MCCFIInstruction::OpLabel:
27202722
llvm_unreachable("unsupported CFI opcode");
@@ -2864,6 +2866,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
28642866
case MCCFIInstruction::OpAdjustCfaOffset:
28652867
case MCCFIInstruction::OpWindowSave:
28662868
case MCCFIInstruction::OpNegateRAState:
2869+
case MCCFIInstruction::OpNegateRAStateWithPC:
28672870
case MCCFIInstruction::OpLLVMDefAspaceCfa:
28682871
case MCCFIInstruction::OpLabel:
28692872
llvm_unreachable("unsupported CFI opcode");
@@ -3684,9 +3687,8 @@ BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {
36843687
BinaryBasicBlock *BB = Stack.top();
36853688
Stack.pop();
36863689

3687-
if (Visited.find(BB) != Visited.end())
3690+
if (!Visited.insert(BB).second)
36883691
continue;
3689-
Visited.insert(BB);
36903692
DFS.push_back(BB);
36913693

36923694
for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
@@ -3879,11 +3881,8 @@ void BinaryFunction::disambiguateJumpTables(
38793881
JumpTable *JT = getJumpTable(Inst);
38803882
if (!JT)
38813883
continue;
3882-
auto Iter = JumpTables.find(JT);
3883-
if (Iter == JumpTables.end()) {
3884-
JumpTables.insert(JT);
3884+
if (JumpTables.insert(JT).second)
38853885
continue;
3886-
}
38873886
// This instruction is an indirect jump using a jump table, but it is
38883887
// using the same jump table of another jump. Try all our tricks to
38893888
// extract the jump table symbol and make it point to a new, duplicated JT

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,9 @@ getDWOName(llvm::DWARFUnit &CU,
5757
"DW_AT_dwo_name/DW_AT_GNU_dwo_name does not exist.");
5858
if (DwarfOutputPath) {
5959
DWOName = std::string(sys::path::filename(DWOName));
60-
auto Iter = NameToIndexMap.find(DWOName);
61-
if (Iter == NameToIndexMap.end())
62-
Iter = NameToIndexMap.insert({DWOName, 0}).first;
63-
DWOName.append(std::to_string(Iter->second));
64-
++Iter->second;
60+
uint32_t &Index = NameToIndexMap[DWOName];
61+
DWOName.append(std::to_string(Index));
62+
++Index;
6563
}
6664
DWOName.append(".dwo");
6765
return DWOName;
@@ -283,8 +281,7 @@ void DIEBuilder::buildTypeUnits(DebugStrOffsetsWriter *StrOffsetWriter,
283281
for (auto &Row : TUIndex.getRows()) {
284282
uint64_t Signature = Row.getSignature();
285283
// manually populate TypeUnit to UnitVector
286-
DwarfContext->getTypeUnitForHash(DwarfContext->getMaxVersion(), Signature,
287-
true);
284+
DwarfContext->getTypeUnitForHash(Signature, true);
288285
}
289286
}
290287
const unsigned int CUNum = getCUNum(DwarfContext, isDWO());

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Core/ParallelUtilities.h"
1616
#include "bolt/Passes/ReorderAlgorithm.h"
1717
#include "bolt/Passes/ReorderFunctions.h"
18+
#include "bolt/Utils/CommandLineOpts.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include <atomic>
2021
#include <mutex>
@@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit(
223224
"functions section"),
224225
cl::init(100), cl::Hidden, cl::cat(BoltCategory));
225226

227+
// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
228+
static cl::opt<int> ProfileDensityCutOffHot(
229+
"profile-density-cutoff-hot", cl::init(990000),
230+
cl::desc("Total samples cutoff for functions used to calculate "
231+
"profile density."));
232+
233+
static cl::opt<double> ProfileDensityThreshold(
234+
"profile-density-threshold", cl::init(60),
235+
cl::desc("If the profile density is below the given threshold, it "
236+
"will be suggested to increase the sampling rate."),
237+
cl::Optional);
238+
226239
} // namespace opts
227240

228241
namespace llvm {
@@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831396
uint64_t StaleSampleCount = 0;
13841397
uint64_t InferredSampleCount = 0;
13851398
std::vector<const BinaryFunction *> ProfiledFunctions;
1399+
std::vector<std::pair<double, uint64_t>> FuncDensityList;
13861400
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
13871401
for (auto &BFI : BC.getBinaryFunctions()) {
13881402
const BinaryFunction &Function = BFI.second;
@@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411455
StaleSampleCount += SampleCount;
14421456
++NumAllStaleFunctions;
14431457
}
1458+
1459+
if (opts::ShowDensity) {
1460+
uint64_t Size = Function.getSize();
1461+
// In case of BOLT split functions registered in BAT, executed traces are
1462+
// automatically attributed to the main fragment. Add up function sizes
1463+
// for all fragments.
1464+
if (IsHotParentOfBOLTSplitFunction)
1465+
for (const BinaryFunction *Fragment : Function.getFragments())
1466+
Size += Fragment->getSize();
1467+
double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
1468+
FuncDensityList.emplace_back(Density, SampleCount);
1469+
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
1470+
<< Function.getSampleCountInBytes() << ", size (b) "
1471+
<< Size << ", density " << Density
1472+
<< ", sample count " << SampleCount << '\n');
1473+
}
14441474
}
14451475
BC.NumProfiledFuncs = ProfiledFunctions.size();
14461476
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841714
BC.outs() << ". Use -print-unknown to see the list.";
16851715
BC.outs() << '\n';
16861716
}
1717+
1718+
if (opts::ShowDensity) {
1719+
double Density = 0.0;
1720+
// Sorted by the density in descending order.
1721+
llvm::stable_sort(FuncDensityList,
1722+
[&](const std::pair<double, uint64_t> &A,
1723+
const std::pair<double, uint64_t> &B) {
1724+
if (A.first != B.first)
1725+
return A.first > B.first;
1726+
return A.second < B.second;
1727+
});
1728+
1729+
uint64_t AccumulatedSamples = 0;
1730+
uint32_t I = 0;
1731+
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
1732+
"The cutoff value is greater than 1000000(100%)");
1733+
while (AccumulatedSamples <
1734+
TotalSampleCount *
1735+
static_cast<float>(opts::ProfileDensityCutOffHot) /
1736+
1000000 &&
1737+
I < FuncDensityList.size()) {
1738+
AccumulatedSamples += FuncDensityList[I].second;
1739+
Density = FuncDensityList[I].first;
1740+
I++;
1741+
}
1742+
if (Density == 0.0) {
1743+
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
1744+
"--profile-density-cutoff-hot option is "
1745+
"set too low. Please check your command.\n";
1746+
} else if (Density < opts::ProfileDensityThreshold) {
1747+
BC.errs()
1748+
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
1749+
<< format("%.1f", opts::ProfileDensityThreshold / Density)
1750+
<< "x more samples. Please consider increasing sampling rate or "
1751+
"profiling for longer duration to get more samples.\n";
1752+
}
1753+
1754+
BC.outs() << "BOLT-INFO: Functions with density >= "
1755+
<< format("%.1f", Density) << " account for "
1756+
<< format("%.2f",
1757+
static_cast<double>(opts::ProfileDensityCutOffHot) /
1758+
10000)
1759+
<< "% total sample counts.\n";
1760+
}
16871761
return Error::success();
16881762
}
16891763

bolt/lib/Passes/Instrumentation.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,8 @@ static bool hasAArch64ExclusiveMemop(
109109
BinaryBasicBlock *BB = BBQueue.front().first;
110110
bool IsLoad = BBQueue.front().second;
111111
BBQueue.pop();
112-
if (Visited.find(BB) != Visited.end())
112+
if (!Visited.insert(BB).second)
113113
continue;
114-
Visited.insert(BB);
115114

116115
for (const MCInst &Inst : *BB) {
117116
// Two loads one after another - skip whole function
@@ -126,8 +125,7 @@ static bool hasAArch64ExclusiveMemop(
126125
if (BC.MIB->isAArch64ExclusiveLoad(Inst))
127126
IsLoad = true;
128127

129-
if (IsLoad && BBToSkip.find(BB) == BBToSkip.end()) {
130-
BBToSkip.insert(BB);
128+
if (IsLoad && BBToSkip.insert(BB).second) {
131129
if (opts::Verbosity >= 2) {
132130
outs() << "BOLT-INSTRUMENTER: skip BB " << BB->getName()
133131
<< " due to exclusive instruction in function "

0 commit comments

Comments
 (0)