Skip to content

Commit 9ed862d

Browse files
committed
[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.4 [skip ci]
1 parent 0de8ff6 commit 9ed862d

File tree

11 files changed

+285
-82
lines changed

11 files changed

+285
-82
lines changed

bolt/docs/Heatmaps.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,12 @@ For the generation, the default bucket size was used with a line size of 128.
8989
Some useful options are:
9090

9191
```
92-
-line-size=<uint> - number of entries per line (default 256)
92+
-line-size=<uint> - number of entries per line (default 256).
93+
Use 128 if the heatmap doesn't fit screen horizontally.
94+
-block-size=<uint>[:<pow2>,...] - heatmap bucket size in bytes (default 64),
95+
optionally followed by power-of-two zoom-out scales to
96+
produce coarse grained heatmaps (default 6, 2, 6 =>
97+
4K, 16K, 1MB).
9398
-max-address=<uint> - maximum address considered valid for heatmap (default 4GB)
9499
-print-mappings - print mappings in the legend, between characters/blocks and text sections (default false)
95100
```

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -370,33 +370,46 @@ class DataAggregator : public DataReader {
370370
/// memory.
371371
///
372372
/// File format syntax:
373-
/// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
374-
/// <count> [<mispred_count>]
373+
/// E <event>
374+
/// S <start> <count>
375+
/// T <start> <end> <ft_end> <count>
376+
/// B <start> <end> <count> <mispred_count>
377+
/// [Ff] <start> <end> <count>
375378
///
376-
/// B - indicates an aggregated branch
377-
/// F - an aggregated fall-through
379+
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
380+
///
381+
/// E - name of the sampling event used for subsequent entries
382+
/// S - indicates an aggregated basic sample at <start>
383+
/// B - indicates an aggregated branch from <start> to <end>
384+
/// F - an aggregated fall-through from <start> to <end>
378385
/// f - an aggregated fall-through with external origin - used to disambiguate
379386
/// between a return hitting a basic block head and a regular internal
380387
/// jump to the block
381-
/// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
382-
///
383-
/// <start_id> - build id of the object containing the start address. We can
384-
/// skip it for the main binary and use "X" for an unknown object. This will
385-
/// save some space and facilitate human parsing.
386-
///
387-
/// <start_offset> - hex offset from the object base load address (0 for the
388-
/// main executable unless it's PIE) to the start address.
388+
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
389+
/// to <ft_end>
389390
///
390-
/// <end_id>, <end_offset> - same for the end address.
391+
/// <id> - build id of the object containing the address. We can skip it for
392+
/// the main binary and use "X" for an unknown object. This will save some
393+
/// space and facilitate human parsing.
391394
///
392-
/// <ft_end> - same for the fallthrough_end address.
395+
/// <offset> - hex offset from the object base load address (0 for the
396+
/// main executable unless it's PIE) to the address.
393397
///
394-
/// <count> - total aggregated count of the branch or a fall-through.
398+
/// <count> - total aggregated count.
395399
///
396400
/// <mispred_count> - the number of times the branch was mispredicted.
397-
/// Omitted for fall-throughs.
398401
///
399402
/// Example:
403+
/// Basic samples profile:
404+
/// E cycles
405+
/// S 41be50 3
406+
/// E br_inst_retired.near_taken
407+
/// S 41be60 6
408+
///
409+
/// Trace profile combining branches and fall-throughs:
410+
/// T 4b196f 4b19e0 4b19ef 2
411+
///
412+
/// Legacy branch profile with separate branches and fall-throughs:
400413
/// F 41be50 41be50 3
401414
/// F 41be90 41be90 4
402415
/// B 4b1942 39b57f0 3 0

bolt/include/bolt/Profile/Heatmap.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class Heatmap {
5555
uint64_t HotStart{0};
5656
uint64_t HotEnd{0};
5757

58+
static constexpr uint64_t KernelBaseAddr = 0xffff800000000000;
5859
inline bool ignoreAddress(uint64_t Address) const {
5960
return (Address > MaxAddress) || (Address < MinAddress);
6061
}
@@ -85,6 +86,10 @@ class Heatmap {
8586
void printSectionHotness(raw_ostream &OS) const;
8687

8788
size_t size() const { return Map.size(); }
89+
90+
/// Increase bucket size by 2^\p Scale, recomputing the heatmap.
91+
/// Returns new bucket size.
92+
uint64_t resizeBucket(uint64_t Pow2Scale);
8893
};
8994

9095
} // namespace bolt

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,19 @@ enum HeatmapModeKind {
2323
HM_Optional // perf2bolt --heatmap
2424
};
2525

26+
struct HeatmapBlockSpec {
27+
unsigned Initial{0}; // Initial block size in bytes.
28+
llvm::SmallVector<unsigned> Scales; // Pow2 zoom factors applied cumulatively.
29+
};
30+
31+
struct HeatmapBlockSpecParser : public llvm::cl::parser<HeatmapBlockSpec> {
32+
explicit HeatmapBlockSpecParser(llvm::cl::Option &O)
33+
: llvm::cl::parser<HeatmapBlockSpec>(O) {}
34+
// Return true on error.
35+
bool parse(llvm::cl::Option &O, llvm::StringRef ArgName, llvm::StringRef Arg,
36+
HeatmapBlockSpec &Val);
37+
};
38+
2639
extern HeatmapModeKind HeatmapMode;
2740
extern bool BinaryAnalysisMode;
2841

@@ -47,7 +60,8 @@ extern llvm::cl::opt<bool> EqualizeBBCounts;
4760
extern llvm::cl::opt<bool> ForcePatch;
4861
extern llvm::cl::opt<bool> RemoveSymtab;
4962
extern llvm::cl::opt<unsigned> ExecutionCountThreshold;
50-
extern llvm::cl::opt<unsigned> HeatmapBlock;
63+
extern llvm::cl::opt<HeatmapBlockSpec, false, HeatmapBlockSpecParser>
64+
HeatmapBlock;
5165
extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
5266
extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
5367
extern llvm::cl::opt<bool> HeatmapPrintMappings;

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 91 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,77 +1204,106 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
12041204
}
12051205

12061206
std::error_code DataAggregator::parseAggregatedLBREntry() {
1207-
while (checkAndConsumeFS()) {
1208-
}
1207+
enum AggregatedLBREntry : char {
1208+
INVALID = 0,
1209+
EVENT_NAME, // E
1210+
TRACE, // T
1211+
SAMPLE, // S
1212+
BRANCH, // B
1213+
FT, // F
1214+
FT_EXTERNAL_ORIGIN // f
1215+
} Type = INVALID;
1216+
1217+
// The number of fields to parse, set based on Type.
1218+
int AddrNum = 0;
1219+
int CounterNum = 0;
1220+
// Storage for parsed fields.
1221+
StringRef EventName;
1222+
std::optional<Location> Addr[3];
1223+
int64_t Counters[2];
1224+
1225+
while (Type == INVALID || Type == EVENT_NAME) {
1226+
while (checkAndConsumeFS()) {
1227+
}
1228+
ErrorOr<StringRef> StrOrErr =
1229+
parseString(FieldSeparator, Type == EVENT_NAME);
1230+
if (std::error_code EC = StrOrErr.getError())
1231+
return EC;
1232+
StringRef Str = StrOrErr.get();
12091233

1210-
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1211-
if (std::error_code EC = TypeOrErr.getError())
1212-
return EC;
1213-
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
1214-
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
1215-
.Case("T", TRACE)
1216-
.Case("B", BRANCH)
1217-
.Case("F", FT)
1218-
.Case("f", FT_EXTERNAL_ORIGIN)
1219-
.Default(INVALID);
1220-
if (Type == INVALID) {
1221-
reportError("expected T, B, F or f");
1222-
return make_error_code(llvm::errc::io_error);
1223-
}
1234+
if (Type == EVENT_NAME) {
1235+
EventName = Str;
1236+
break;
1237+
}
12241238

1225-
while (checkAndConsumeFS()) {
1226-
}
1227-
ErrorOr<Location> From = parseLocationOrOffset();
1228-
if (std::error_code EC = From.getError())
1229-
return EC;
1239+
Type = StringSwitch<AggregatedLBREntry>(Str)
1240+
.Case("T", TRACE)
1241+
.Case("S", SAMPLE)
1242+
.Case("E", EVENT_NAME)
1243+
.Case("B", BRANCH)
1244+
.Case("F", FT)
1245+
.Case("f", FT_EXTERNAL_ORIGIN)
1246+
.Default(INVALID);
1247+
1248+
if (Type == INVALID) {
1249+
reportError("expected T, S, E, B, F or f");
1250+
return make_error_code(llvm::errc::io_error);
1251+
}
12301252

1231-
while (checkAndConsumeFS()) {
1253+
using SSI = StringSwitch<int>;
1254+
AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2);
1255+
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
12321256
}
1233-
ErrorOr<Location> To = parseLocationOrOffset();
1234-
if (std::error_code EC = To.getError())
1235-
return EC;
12361257

1237-
ErrorOr<Location> TraceFtEnd = std::error_code();
1238-
if (Type == AggregatedLBREntry::TRACE) {
1258+
for (int I = 0; I < AddrNum; ++I) {
12391259
while (checkAndConsumeFS()) {
12401260
}
1241-
TraceFtEnd = parseLocationOrOffset();
1242-
if (std::error_code EC = TraceFtEnd.getError())
1261+
ErrorOr<Location> AddrOrErr = parseLocationOrOffset();
1262+
if (std::error_code EC = AddrOrErr.getError())
12431263
return EC;
1264+
Addr[I] = AddrOrErr.get();
12441265
}
12451266

1246-
while (checkAndConsumeFS()) {
1247-
}
1248-
ErrorOr<int64_t> Frequency =
1249-
parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1250-
if (std::error_code EC = Frequency.getError())
1251-
return EC;
1252-
1253-
uint64_t Mispreds = 0;
1254-
if (Type == AggregatedLBREntry::BRANCH) {
1267+
for (int I = 0; I < CounterNum; ++I) {
12551268
while (checkAndConsumeFS()) {
12561269
}
1257-
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1258-
if (std::error_code EC = MispredsOrErr.getError())
1270+
ErrorOr<int64_t> CountOrErr =
1271+
parseNumberField(FieldSeparator, I + 1 == CounterNum);
1272+
if (std::error_code EC = CountOrErr.getError())
12591273
return EC;
1260-
Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1274+
Counters[I] = CountOrErr.get();
12611275
}
12621276

12631277
if (!checkAndConsumeNewLine()) {
12641278
reportError("expected end of line");
12651279
return make_error_code(llvm::errc::io_error);
12661280
}
12671281

1268-
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
1269-
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
1282+
if (Type == EVENT_NAME) {
1283+
EventNames.insert(EventName);
1284+
return std::error_code();
1285+
}
1286+
1287+
const uint64_t FromOffset = Addr[0]->Offset;
1288+
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
1289+
if (FromFunc)
1290+
FromFunc->setHasProfileAvailable();
12701291

1271-
for (BinaryFunction *BF : {FromFunc, ToFunc})
1272-
if (BF)
1273-
BF->setHasProfileAvailable();
1292+
int64_t Count = Counters[0];
1293+
int64_t Mispreds = Counters[1];
12741294

1275-
uint64_t Count = static_cast<uint64_t>(Frequency.get());
1295+
if (Type == SAMPLE) {
1296+
BasicSamples[FromOffset] += Count;
1297+
NumTotalSamples += Count;
1298+
return std::error_code();
1299+
}
12761300

1277-
Trace Trace(From->Offset, To->Offset);
1301+
const uint64_t ToOffset = Addr[1]->Offset;
1302+
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
1303+
if (ToFunc)
1304+
ToFunc->setHasProfileAvailable();
1305+
1306+
Trace Trace(FromOffset, ToOffset);
12781307
// Taken trace
12791308
if (Type == TRACE || Type == BRANCH) {
12801309
TakenBranchInfo &Info = BranchLBRs[Trace];
@@ -1285,8 +1314,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12851314
}
12861315
// Construct fallthrough part of the trace
12871316
if (Type == TRACE) {
1288-
Trace.From = To->Offset;
1289-
Trace.To = TraceFtEnd->Offset;
1317+
const uint64_t TraceFtEndOffset = Addr[2]->Offset;
1318+
Trace.From = ToOffset;
1319+
Trace.To = TraceFtEndOffset;
12901320
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
12911321
}
12921322
// Add fallthrough trace
@@ -1314,8 +1344,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
13141344
opts::HeatmapMaxAddress = 0xffffffffffffffff;
13151345
opts::HeatmapMinAddress = KernelBaseAddr;
13161346
}
1317-
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
1318-
opts::HeatmapMaxAddress, getTextSections(BC));
1347+
opts::HeatmapBlockSpec HMBS = opts::HeatmapBlock;
1348+
Heatmap HM(HMBS.Initial, opts::HeatmapMinAddress, opts::HeatmapMaxAddress,
1349+
getTextSections(BC));
13191350
auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t {
13201351
if (Symbol)
13211352
if (ErrorOr<uint64_t> SymValue = BC->getSymbolValue(*Symbol))
@@ -1365,6 +1396,14 @@ std::error_code DataAggregator::printLBRHeatMap() {
13651396
HM.printCDF(opts::HeatmapOutput + ".csv");
13661397
HM.printSectionHotness(opts::HeatmapOutput + "-section-hotness.csv");
13671398
}
1399+
// Provide coarse-grained heatmaps if requested via zoom-out scales
1400+
for (const uint64_t NewBucketSizeScale : HMBS.Scales) {
1401+
uint64_t NewBucketSize = HM.resizeBucket(NewBucketSizeScale);
1402+
if (opts::HeatmapOutput == "-")
1403+
HM.print(opts::HeatmapOutput);
1404+
else
1405+
HM.print(formatv("{0}-{1}", opts::HeatmapOutput, NewBucketSize).str());
1406+
}
13681407

13691408
return std::error_code();
13701409
}

bolt/lib/Profile/Heatmap.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ void Heatmap::print(StringRef FileName) const {
5555
errs() << "error opening output file: " << EC.message() << '\n';
5656
exit(1);
5757
}
58+
outs() << "HEATMAP: dumping heatmap with bucket size " << BucketSize << " to "
59+
<< FileName << '\n';
5860
print(OS);
5961
}
6062

@@ -304,14 +306,20 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
304306
if (TextSections.empty())
305307
return;
306308

307-
uint64_t UnmappedHotness = 0;
309+
uint64_t UnmappedCounts = 0;
310+
uint64_t KernelCounts = 0;
311+
AddressRange KernelRange(KernelBaseAddr, -1ULL);
308312
auto RecordUnmappedBucket = [&](uint64_t Address, uint64_t Frequency) {
313+
if (KernelRange.contains(Address)) {
314+
KernelCounts += Frequency;
315+
return;
316+
}
309317
if (opts::Verbosity >= 1)
310318
errs() << "Couldn't map the address bucket [0x"
311319
<< Twine::utohexstr(Address) << ", 0x"
312320
<< Twine::utohexstr(Address + BucketSize) << "] containing "
313321
<< Frequency << " samples to a text section in the binary.";
314-
UnmappedHotness += Frequency;
322+
UnmappedCounts += Frequency;
315323
};
316324

317325
AddressRange HotTextRange(HotStart, HotEnd);
@@ -348,7 +356,7 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
348356

349357
OS << "Section Name, Begin Address, End Address, Percentage Hotness, "
350358
<< "Utilization Pct, Partition Score\n";
351-
const uint64_t MappedCounts = NumTotalCounts - UnmappedHotness;
359+
const uint64_t MappedCounts = NumTotalCounts - UnmappedCounts - KernelCounts;
352360
for (const auto [Name, Begin, End] : Sections) {
353361
const float Hotness = 1. * SectionHotness[Name] / NumTotalCounts;
354362
const float MappedHotness =
@@ -360,9 +368,20 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
360368
OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}, {5:f4}\n", Name, Begin,
361369
End, 100. * Hotness, 100. * Utilization, PartitionScore);
362370
}
363-
if (UnmappedHotness > 0)
371+
if (KernelCounts)
372+
OS << formatv("[kernel], {0:x}, {1:x}, {0:f4}, 0, 0\n", KernelBaseAddr,
373+
-1ULL, 100.0 * KernelCounts / NumTotalCounts);
374+
if (UnmappedCounts)
364375
OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n",
365-
100.0 * UnmappedHotness / NumTotalCounts);
376+
100.0 * UnmappedCounts / NumTotalCounts);
377+
}
378+
379+
uint64_t Heatmap::resizeBucket(uint64_t Pow2Scale) {
380+
std::map<uint64_t, uint64_t> NewMap;
381+
for (const auto [Bucket, Count] : Map)
382+
NewMap[Bucket >> Pow2Scale] += Count;
383+
Map = NewMap;
384+
return BucketSize <<= Pow2Scale;
366385
}
367386
} // namespace bolt
368387
} // namespace llvm

0 commit comments

Comments
 (0)