Skip to content

Commit d1584fc

Browse files
committed
Merge branch 'main' into xegpu_blocking_pass
2 parents 6cffa44 + 3807eea commit d1584fc

File tree

3,328 files changed

+816797
-273310
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,328 files changed

+816797
-273310
lines changed

.github/new-prs-labeler.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ mlgo:
702702
- llvm/unittests/CodeGen/ML*
703703
- llvm/test/CodeGen/MLRegAlloc/**
704704
- llvm/utils/mlgo-utils/**
705+
- llvm/docs/MLGO.rst
705706

706707
tools:llvm-exegesis:
707708
- llvm/tools/llvm-exegesis/**

.github/workflows/containers/github-action-ci-windows/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ RUN choco install -y handle
108108
109109
RUN pip3 install pywin32 buildbot-worker==2.8.4
110110
111-
ARG RUNNER_VERSION=2.323.0
111+
ARG RUNNER_VERSION=2.324.0
112112
ENV RUNNER_VERSION=$RUNNER_VERSION
113113
114114
RUN powershell -Command \

.github/workflows/containers/github-action-ci/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:24.04 as base
22
ENV LLVM_SYSROOT=/opt/llvm
33

44
FROM base as stage1-toolchain
5-
ENV LLVM_VERSION=20.1.1
5+
ENV LLVM_VERSION=20.1.4
66

77
RUN apt-get update && \
88
apt-get install -y \
@@ -86,7 +86,7 @@ WORKDIR /home/gha
8686

8787
FROM ci-container as ci-container-agent
8888

89-
ENV GITHUB_RUNNER_VERSION=2.323.0
89+
ENV GITHUB_RUNNER_VERSION=2.324.0
9090

9191
RUN mkdir actions-runner && \
9292
cd actions-runner && \

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ class BinaryFunction {
142142
/// Types of profile the function can use. Could be a combination.
143143
enum {
144144
PF_NONE = 0, /// No profile.
145-
PF_LBR = 1, /// Profile is based on last branch records.
146-
PF_SAMPLE = 2, /// Non-LBR sample-based profile.
145+
PF_BRANCH = 1, /// Profile is based on branches or branch stacks.
146+
PF_BASIC = 2, /// Non-branch IP sample-based profile.
147147
PF_MEMEVENT = 4, /// Profile has mem events.
148148
};
149149

@@ -392,7 +392,7 @@ class BinaryFunction {
392392
float ProfileMatchRatio{0.0f};
393393

394394
/// Raw branch count for this function in the profile.
395-
uint64_t RawBranchCount{0};
395+
uint64_t RawSampleCount{0};
396396

397397
/// Dynamically executed function bytes, used for density computation.
398398
uint64_t SampleCountInBytes{0};
@@ -1893,11 +1893,11 @@ class BinaryFunction {
18931893

18941894
/// Return the raw profile information about the number of branch
18951895
/// executions corresponding to this function.
1896-
uint64_t getRawBranchCount() const { return RawBranchCount; }
1896+
uint64_t getRawSampleCount() const { return RawSampleCount; }
18971897

18981898
/// Set the profile data about the number of branch executions corresponding
18991899
/// to this function.
1900-
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
1900+
void setRawSampleCount(uint64_t Count) { RawSampleCount = Count; }
19011901

19021902
/// Return the number of dynamically executed bytes, from raw perf data.
19031903
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,6 @@ class DataAggregator : public DataReader {
9292
uint64_t Addr;
9393
};
9494

95-
/// Used for parsing specific pre-aggregated input files.
96-
struct AggregatedLBREntry {
97-
enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE };
98-
Location From;
99-
Location To;
100-
uint64_t Count;
101-
uint64_t Mispreds;
102-
Type EntryType;
103-
};
104-
10595
struct Trace {
10696
uint64_t From;
10797
uint64_t To;
@@ -131,7 +121,6 @@ class DataAggregator : public DataReader {
131121
/// and use them later for processing and assigning profile.
132122
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
133123
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
134-
std::vector<AggregatedLBREntry> AggregatedLBRs;
135124
std::unordered_map<uint64_t, uint64_t> BasicSamples;
136125
std::vector<PerfMemSample> MemSamples;
137126

@@ -223,11 +212,6 @@ class DataAggregator : public DataReader {
223212
uint64_t NumTraces{0};
224213
uint64_t NumInvalidTraces{0};
225214
uint64_t NumLongRangeTraces{0};
226-
/// Specifies how many samples were recorded in cold areas if we are dealing
227-
/// with profiling data collected in a bolted binary. For LBRs, incremented
228-
/// for the source of the branch to avoid counting cold activity twice (one
229-
/// for source and another for destination).
230-
uint64_t NumColdSamples{0};
231215
uint64_t NumTotalSamples{0};
232216

233217
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
@@ -257,7 +241,8 @@ class DataAggregator : public DataReader {
257241

258242
/// Semantic actions - parser hooks to interpret parsed perf samples
259243
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address
260-
bool doSample(BinaryFunction &Func, const uint64_t Address, uint64_t Count);
244+
bool doBasicSample(BinaryFunction &Func, const uint64_t Address,
245+
uint64_t Count);
261246

262247
/// Register an intraprocedural branch \p Branch.
263248
bool doIntraBranch(BinaryFunction &Func, uint64_t From, uint64_t To,
@@ -422,9 +407,6 @@ class DataAggregator : public DataReader {
422407
/// an external tool.
423408
std::error_code parsePreAggregatedLBRSamples();
424409

425-
/// Process parsed pre-aggregated data.
426-
void processPreAggregated();
427-
428410
/// If \p Address falls into the binary address space based on memory
429411
/// mapping info \p MMI, then adjust it for further processing by subtracting
430412
/// the base load address. External addresses, i.e. addresses that do not
@@ -486,7 +468,6 @@ class DataAggregator : public DataReader {
486468
void dump(const PerfMemSample &Sample) const;
487469

488470
/// Profile diagnostics print methods
489-
void printColdSamplesDiagnostic() const;
490471
void printLongRangeTracesDiagnostic() const;
491472
void printBranchSamplesDiagnostics() const;
492473
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;

bolt/include/bolt/Profile/DataReader.h

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,8 @@ struct FuncBranchData {
114114

115115
FuncBranchData() {}
116116

117-
FuncBranchData(StringRef Name, ContainerTy Data)
118-
: Name(Name), Data(std::move(Data)) {}
119-
120-
FuncBranchData(StringRef Name, ContainerTy Data, ContainerTy EntryData)
117+
FuncBranchData(StringRef Name, ContainerTy Data = ContainerTy(),
118+
ContainerTy EntryData = ContainerTy())
121119
: Name(Name), Data(std::move(Data)), EntryData(std::move(EntryData)) {}
122120

123121
ErrorOr<const BranchInfo &> getBranch(uint64_t From, uint64_t To) const;
@@ -205,22 +203,23 @@ struct FuncMemData {
205203

206204
FuncMemData() {}
207205

208-
FuncMemData(StringRef Name, ContainerTy Data)
206+
FuncMemData(StringRef Name, ContainerTy Data = ContainerTy())
209207
: Name(Name), Data(std::move(Data)) {}
210208
};
211209

212210
/// Similar to BranchInfo, but instead of recording from-to address (an edge),
213211
/// it records the address of a perf event and the number of times samples hit
214212
/// this address.
215-
struct SampleInfo {
213+
struct BasicSampleInfo {
216214
Location Loc;
217215
int64_t Hits;
218216

219-
SampleInfo(Location Loc, int64_t Hits) : Loc(std::move(Loc)), Hits(Hits) {}
217+
BasicSampleInfo(Location Loc, int64_t Hits)
218+
: Loc(std::move(Loc)), Hits(Hits) {}
220219

221-
bool operator==(const SampleInfo &RHS) const { return Loc == RHS.Loc; }
220+
bool operator==(const BasicSampleInfo &RHS) const { return Loc == RHS.Loc; }
222221

223-
bool operator<(const SampleInfo &RHS) const {
222+
bool operator<(const BasicSampleInfo &RHS) const {
224223
if (Loc < RHS.Loc)
225224
return true;
226225

@@ -229,18 +228,18 @@ struct SampleInfo {
229228

230229
void print(raw_ostream &OS) const;
231230

232-
void mergeWith(const SampleInfo &SI);
231+
void mergeWith(const BasicSampleInfo &SI);
233232
};
234233

235234
/// Helper class to store samples recorded in the address space of a given
236235
/// function, analogous to FuncBranchData but for samples instead of branches.
237-
struct FuncSampleData {
238-
typedef std::vector<SampleInfo> ContainerTy;
236+
struct FuncBasicSampleData {
237+
typedef std::vector<BasicSampleInfo> ContainerTy;
239238

240239
StringRef Name;
241240
ContainerTy Data;
242241

243-
FuncSampleData(StringRef Name, ContainerTy Data)
242+
FuncBasicSampleData(StringRef Name, ContainerTy Data = ContainerTy())
244243
: Name(Name), Data(std::move(Data)) {}
245244

246245
/// Get the number of samples recorded in [Start, End)
@@ -308,7 +307,7 @@ class DataReader : public ProfileReaderBase {
308307
/// The last step is to infer edge counts based on BB execution count. Note
309308
/// this is the opposite of the LBR way, where we infer BB execution count
310309
/// based on edge counts.
311-
void readSampleData(BinaryFunction &BF);
310+
void readBasicSampleData(BinaryFunction &BF);
312311

313312
/// Convert function-level branch data into instruction annotations.
314313
void convertBranchData(BinaryFunction &BF) const;
@@ -382,7 +381,8 @@ class DataReader : public ProfileReaderBase {
382381
/// Return mem data matching one of the names in \p FuncNames.
383382
FuncMemData *getMemDataForNames(const std::vector<StringRef> &FuncNames);
384383

385-
FuncSampleData *getFuncSampleData(const std::vector<StringRef> &FuncNames);
384+
FuncBasicSampleData *
385+
getFuncBasicSampleData(const std::vector<StringRef> &FuncNames);
386386

387387
/// Return a vector of all FuncBranchData matching the list of names.
388388
/// Internally use fuzzy matching to match special names like LTO-generated
@@ -425,7 +425,7 @@ class DataReader : public ProfileReaderBase {
425425
}
426426

427427
using NamesToBranchesMapTy = std::map<StringRef, FuncBranchData>;
428-
using NamesToSamplesMapTy = std::map<StringRef, FuncSampleData>;
428+
using NamesToBasicSamplesMapTy = std::map<StringRef, FuncBasicSampleData>;
429429
using NamesToMemEventsMapTy = std::map<StringRef, FuncMemData>;
430430
using FuncsToBranchesMapTy =
431431
std::unordered_map<const BinaryFunction *, FuncBranchData *>;
@@ -474,7 +474,7 @@ class DataReader : public ProfileReaderBase {
474474
return parseLocation(EndChar, EndNl, true);
475475
}
476476
ErrorOr<BranchInfo> parseBranchInfo();
477-
ErrorOr<SampleInfo> parseSampleInfo();
477+
ErrorOr<BasicSampleInfo> parseSampleInfo();
478478
ErrorOr<MemInfo> parseMemInfo();
479479
ErrorOr<bool> maybeParseNoLBRFlag();
480480
ErrorOr<bool> maybeParseBATFlag();
@@ -488,7 +488,7 @@ class DataReader : public ProfileReaderBase {
488488
unsigned Line{0};
489489
unsigned Col{0};
490490
NamesToBranchesMapTy NamesToBranches;
491-
NamesToSamplesMapTy NamesToSamples;
491+
NamesToBasicSamplesMapTy NamesToBasicSamples;
492492
NamesToMemEventsMapTy NamesToMemEvents;
493493
FuncsToBranchesMapTy FuncsToBranches;
494494
FuncsToMemDataMapTy FuncsToMemData;

bolt/include/bolt/Profile/Heatmap.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,17 @@ class Heatmap {
5252
: BucketSize(BucketSize), MinAddress(MinAddress), MaxAddress(MaxAddress),
5353
TextSections(TextSections) {}
5454

55+
uint64_t HotStart{0};
56+
uint64_t HotEnd{0};
57+
5558
inline bool ignoreAddress(uint64_t Address) const {
5659
return (Address > MaxAddress) || (Address < MinAddress);
5760
}
5861

5962
/// Register a single sample at \p Address.
60-
void registerAddress(uint64_t Address) {
63+
void registerAddress(uint64_t Address, uint64_t Count) {
6164
if (!ignoreAddress(Address))
62-
++Map[Address / BucketSize];
65+
Map[Address / BucketSize] += Count;
6366
}
6467

6568
/// Register \p Count samples at [\p StartAddress, \p EndAddress ].

bolt/include/bolt/Profile/ProfileYAMLMapping.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,8 @@ LLVM_YAML_STRONG_TYPEDEF(uint16_t, PROFILE_PF)
230230

231231
template <> struct ScalarBitSetTraits<PROFILE_PF> {
232232
static void bitset(IO &io, PROFILE_PF &value) {
233-
io.bitSetCase(value, "lbr", BinaryFunction::PF_LBR);
234-
io.bitSetCase(value, "sample", BinaryFunction::PF_SAMPLE);
233+
io.bitSetCase(value, "lbr", BinaryFunction::PF_BRANCH);
234+
io.bitSetCase(value, "sample", BinaryFunction::PF_BASIC);
235235
io.bitSetCase(value, "memevent", BinaryFunction::PF_MEMEVENT);
236236
}
237237
};

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@
1717

1818
namespace opts {
1919

20-
extern bool HeatmapMode;
20+
enum HeatmapModeKind {
21+
HM_None = 0,
22+
HM_Exclusive, // llvm-bolt-heatmap
23+
HM_Optional // perf2bolt --heatmap
24+
};
25+
26+
extern HeatmapModeKind HeatmapMode;
2127
extern bool BinaryAnalysisMode;
2228

2329
extern llvm::cl::OptionCategory BoltCategory;
@@ -45,6 +51,7 @@ extern llvm::cl::opt<unsigned> HeatmapBlock;
4551
extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
4652
extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
4753
extern llvm::cl::opt<bool> HeatmapPrintMappings;
54+
extern llvm::cl::opt<std::string> HeatmapOutput;
4855
extern llvm::cl::opt<bool> HotData;
4956
extern llvm::cl::opt<bool> HotFunctionsAtEnd;
5057
extern llvm::cl::opt<bool> HotText;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ extern cl::opt<bool> UpdateDebugSections;
6666
extern cl::opt<unsigned> Verbosity;
6767

6868
extern bool BinaryAnalysisMode;
69-
extern bool HeatmapMode;
69+
extern HeatmapModeKind HeatmapMode;
7070
extern bool processAllFunctions();
7171

7272
static cl::opt<bool> CheckEncoding(
@@ -473,7 +473,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
473473
OS << "\n Image : 0x" << Twine::utohexstr(getImageAddress());
474474
if (ExecutionCount != COUNT_NO_PROFILE) {
475475
OS << "\n Exec Count : " << ExecutionCount;
476-
OS << "\n Branch Count: " << RawBranchCount;
476+
OS << "\n Sample Count: " << RawSampleCount;
477477
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
478478
}
479479

@@ -3326,7 +3326,7 @@ void BinaryFunction::duplicateConstantIslands() {
33263326
static std::string constructFilename(std::string Filename,
33273327
std::string Annotation,
33283328
std::string Suffix) {
3329-
std::replace(Filename.begin(), Filename.end(), '/', '-');
3329+
llvm::replace(Filename, '/', '-');
33303330
if (!Annotation.empty())
33313331
Annotation.insert(0, "-");
33323332
if (Filename.size() + Annotation.size() + Suffix.size() > MAX_PATH) {

0 commit comments

Comments
 (0)