Skip to content

Commit 5c513dc

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.4
2 parents a3c4a5c + 202eea6 commit 5c513dc

File tree

8 files changed

+156
-56
lines changed

8 files changed

+156
-56
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -370,37 +370,43 @@ class DataAggregator : public DataReader {
370370
/// memory.
371371
///
372372
/// File format syntax:
373-
/// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
374-
/// <count> [<mispred_count>]
373+
/// {S|B|F|f|T} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
375374
///
376-
/// B - indicates an aggregated branch
377-
/// F - an aggregated fall-through
375+
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
376+
///
377+
/// S - indicates an aggregated basic sample at <start>
378+
/// B - indicates an aggregated branch from <start> to <end>
379+
/// F - an aggregated fall-through from <start> to <end>
378380
/// f - an aggregated fall-through with external origin - used to disambiguate
379381
/// between a return hitting a basic block head and a regular internal
380382
/// jump to the block
381-
/// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
382-
///
383-
/// <start_id> - build id of the object containing the start address. We can
384-
/// skip it for the main binary and use "X" for an unknown object. This will
385-
/// save some space and facilitate human parsing.
386-
///
387-
/// <start_offset> - hex offset from the object base load address (0 for the
388-
/// main executable unless it's PIE) to the start address.
383+
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
384+
/// to <ft_end>
389385
///
390-
/// <end_id>, <end_offset> - same for the end address.
386+
/// <id> - build id of the object containing the address. We can skip it for
387+
/// the main binary and use "X" for an unknown object. This will save some
388+
/// space and facilitate human parsing.
391389
///
392-
/// <ft_end> - same for the fallthrough_end address.
390+
/// <offset> - hex offset from the object base load address (0 for the
391+
/// main executable unless it's PIE) to the address.
393392
///
394393
/// <count> - total aggregated count of the branch or a fall-through.
395394
///
396395
/// <mispred_count> - the number of times the branch was mispredicted.
397396
/// Omitted for fall-throughs.
398397
///
399398
/// Example:
399+
/// Basic samples profile:
400+
/// S 41be50 3
401+
///
402+
/// Soft-deprecated branch profile with separate branches and fall-throughs:
400403
/// F 41be50 41be50 3
401404
/// F 41be90 41be90 4
402405
/// B 4b1942 39b57f0 3 0
403406
/// B 4b196f 4b19e0 2 0
407+
///
408+
/// Recommended branch profile with pre-aggregated traces:
409+
/// T 4b196f 4b19e0 4b19ef 2
404410
void parsePreAggregated();
405411

406412
/// Parse the full output of pre-aggregated LBR samples generated by

bolt/include/bolt/Profile/Heatmap.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ class Heatmap {
8585
void printSectionHotness(raw_ostream &OS) const;
8686

8787
size_t size() const { return Map.size(); }
88+
89+
/// Increase bucket size to \p TargetSize, recomputing the heatmap.
90+
bool resizeBucket(uint64_t TargetSize);
8891
};
8992

9093
} // namespace bolt

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 64 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ FilterPID("pid",
6868
cl::Optional,
6969
cl::cat(AggregatorCategory));
7070

71+
static cl::list<uint64_t>
72+
HeatmapZoomOut("heatmap-zoom-out", cl::CommaSeparated,
73+
cl::desc("print secondary heatmaps with given bucket sizes"),
74+
cl::value_desc("bucket_size"), cl::Optional,
75+
cl::cat(HeatmapCategory));
76+
7177
static cl::opt<bool>
7278
IgnoreBuildID("ignore-build-id",
7379
cl::desc("continue even if build-ids in input binary and perf.data mismatch"),
@@ -1210,71 +1216,80 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12101216
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
12111217
if (std::error_code EC = TypeOrErr.getError())
12121218
return EC;
1213-
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
1219+
enum AggregatedLBREntry {
1220+
TRACE,
1221+
SAMPLE,
1222+
BRANCH,
1223+
FT,
1224+
FT_EXTERNAL_ORIGIN,
1225+
INVALID
1226+
};
12141227
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
12151228
.Case("T", TRACE)
1229+
.Case("S", SAMPLE)
12161230
.Case("B", BRANCH)
12171231
.Case("F", FT)
12181232
.Case("f", FT_EXTERNAL_ORIGIN)
12191233
.Default(INVALID);
12201234
if (Type == INVALID) {
1221-
reportError("expected T, B, F or f");
1235+
reportError("expected T, S, B, F or f");
12221236
return make_error_code(llvm::errc::io_error);
12231237
}
12241238

1225-
while (checkAndConsumeFS()) {
1226-
}
1227-
ErrorOr<Location> From = parseLocationOrOffset();
1228-
if (std::error_code EC = From.getError())
1229-
return EC;
1239+
std::optional<Location> Addrs[3];
1240+
int AddrNum = 2;
1241+
if (Type == TRACE)
1242+
AddrNum = 3;
1243+
else if (Type == SAMPLE)
1244+
AddrNum = 1;
12301245

1231-
while (checkAndConsumeFS()) {
1232-
}
1233-
ErrorOr<Location> To = parseLocationOrOffset();
1234-
if (std::error_code EC = To.getError())
1235-
return EC;
1246+
int64_t Counters[2];
1247+
int CounterNum = 1;
1248+
if (Type == BRANCH)
1249+
CounterNum = 2;
12361250

1237-
ErrorOr<Location> TraceFtEnd = std::error_code();
1238-
if (Type == AggregatedLBREntry::TRACE) {
1251+
for (int I = 0; I < AddrNum; ++I) {
12391252
while (checkAndConsumeFS()) {
12401253
}
1241-
TraceFtEnd = parseLocationOrOffset();
1242-
if (std::error_code EC = TraceFtEnd.getError())
1243-
return EC;
1244-
}
1245-
1246-
while (checkAndConsumeFS()) {
1254+
if (ErrorOr<Location> Addr = parseLocationOrOffset())
1255+
Addrs[I] = Addr.get();
1256+
else
1257+
return Addr.getError();
12471258
}
1248-
ErrorOr<int64_t> Frequency =
1249-
parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1250-
if (std::error_code EC = Frequency.getError())
1251-
return EC;
12521259

1253-
uint64_t Mispreds = 0;
1254-
if (Type == AggregatedLBREntry::BRANCH) {
1260+
for (int I = 0; I < CounterNum; ++I) {
12551261
while (checkAndConsumeFS()) {
12561262
}
1257-
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1258-
if (std::error_code EC = MispredsOrErr.getError())
1259-
return EC;
1260-
Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1263+
if (ErrorOr<int64_t> Count = parseNumberField(FieldSeparator, I + 1 == CounterNum))
1264+
Counters[I] = Count.get();
1265+
else
1266+
return Count.getError();
12611267
}
12621268

12631269
if (!checkAndConsumeNewLine()) {
12641270
reportError("expected end of line");
12651271
return make_error_code(llvm::errc::io_error);
12661272
}
12671273

1268-
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
1269-
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
1274+
const uint64_t FromOffset = Addrs[0]->Offset;
1275+
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
1276+
if (FromFunc)
1277+
FromFunc->setHasProfileAvailable();
12701278

1271-
for (BinaryFunction *BF : {FromFunc, ToFunc})
1272-
if (BF)
1273-
BF->setHasProfileAvailable();
1279+
if (Type == SAMPLE) {
1280+
BasicSamples[FromOffset] += Counters[0];
1281+
return std::error_code();
1282+
}
1283+
1284+
const uint64_t ToOffset = Addrs[1]->Offset;
1285+
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
1286+
if (ToFunc)
1287+
ToFunc->setHasProfileAvailable();
12741288

1275-
uint64_t Count = static_cast<uint64_t>(Frequency.get());
1289+
int64_t Count = Counters[0];
1290+
int64_t Mispreds = Counters[1];
12761291

1277-
Trace Trace(From->Offset, To->Offset);
1292+
Trace Trace(FromOffset, ToOffset);
12781293
// Taken trace
12791294
if (Type == TRACE || Type == BRANCH) {
12801295
TakenBranchInfo &Info = BranchLBRs[Trace];
@@ -1285,8 +1300,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12851300
}
12861301
// Construct fallthrough part of the trace
12871302
if (Type == TRACE) {
1288-
Trace.From = To->Offset;
1289-
Trace.To = TraceFtEnd->Offset;
1303+
const uint64_t TraceFtEndOffset = Addrs[2]->Offset;
1304+
Trace.From = ToOffset;
1305+
Trace.To = TraceFtEndOffset;
12901306
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
12911307
}
12921308
// Add fallthrough trace
@@ -1365,6 +1381,15 @@ std::error_code DataAggregator::printLBRHeatMap() {
13651381
HM.printCDF(opts::HeatmapOutput + ".csv");
13661382
HM.printSectionHotness(opts::HeatmapOutput + "-section-hotness.csv");
13671383
}
1384+
// Provide coarse-grained heatmap if requested via --heatmap-zoom-out
1385+
for (const uint64_t NewBucketSize : opts::HeatmapZoomOut) {
1386+
if (!HM.resizeBucket(NewBucketSize))
1387+
break;
1388+
if (opts::HeatmapOutput == "-")
1389+
HM.print(opts::HeatmapOutput);
1390+
else
1391+
HM.print(formatv("{0}-{1}", opts::HeatmapOutput, NewBucketSize).str());
1392+
}
13681393

13691394
return std::error_code();
13701395
}

bolt/lib/Profile/Heatmap.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,5 +364,18 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
364364
OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n",
365365
100.0 * UnmappedHotness / NumTotalCounts);
366366
}
367+
368+
bool Heatmap::resizeBucket(uint64_t TargetSize) {
369+
if (TargetSize <= BucketSize)
370+
return false;
371+
std::map<uint64_t, uint64_t> NewMap;
372+
for (const auto [Bucket, Count] : Map) {
373+
const uint64_t Address = Bucket * BucketSize;
374+
NewMap[Address / TargetSize] += Count;
375+
}
376+
Map = NewMap;
377+
BucketSize = TargetSize;
378+
return true;
379+
}
367380
} // namespace bolt
368381
} // namespace llvm
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
S 4005f0 1
2+
S 4005f0 1
3+
S 400610 1
4+
S 400ad1 2
5+
S 400b10 1
6+
S 400bb7 1
7+
S 400bbc 2
8+
S 400d90 1
9+
S 400dae 1
10+
S 400e00 2
11+
S 401170 22
12+
S 401180 58
13+
S 4011a0 33
14+
S 4011a9 33
15+
S 4011ad 58
16+
S 4011b2 22
17+
S X:7f36d18d60c0 2
18+
S X:7f36d18f2ce0 1

bolt/test/X86/heatmap-preagg.test

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@
33
RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
44
## Non-BOLTed input binary
55
RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \
6+
RUN: --heatmap-zoom-out 128,1024 --line-size 64 \
67
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s
78
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv
9+
RUN: FileCheck %s --check-prefix CHECK-HM-64 --input-file %t
10+
RUN: FileCheck %s --check-prefix CHECK-HM-128 --input-file %t-128
11+
RUN: FileCheck %s --check-prefix CHECK-HM-1024 --input-file %t-1024
812

913
## BOLTed input binary
1014
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
@@ -24,6 +28,29 @@ CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317
2428
CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671
2529
CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000, 0.0000
2630

31+
# Only check x scales – can't check colors, and FileCheck doesn't strip color
32+
# codes by default.
33+
CHECK-HM-64: (299, 937]
34+
CHECK-HM-64-NEXT: 0
35+
CHECK-HM-64-NEXT: 0
36+
CHECK-HM-64-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f
37+
CHECK-HM-64-NEXT: 048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c
38+
CHECK-HM-64-NEXT: 0
39+
40+
CHECK-HM-128: (299, 937]
41+
CHECK-HM-128-NEXT: 0
42+
CHECK-HM-128-NEXT: 0 1
43+
CHECK-HM-128-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f
44+
CHECK-HM-128-NEXT: 0808080808080808080808080808080808080808080808080808080808080808
45+
CHECK-HM-128-NEXT: 0
46+
47+
CHECK-HM-1024: (483, 1663]
48+
CHECK-HM-1024-NEXT: 0
49+
CHECK-HM-1024-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f
50+
CHECK-HM-1024-NEXT: 048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c
51+
CHECK-HM-1024-NEXT: 0
52+
CHECK-HM-1024-NEXT: 0
53+
2754
CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
2855
CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
2956

bolt/test/X86/pre-aggregated-perf.test

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
5757
RUN: --aggregate-only --profile-format=yaml --profile-use-dfs
5858
RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
5959

60+
## Test pre-aggregated basic profile
61+
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
62+
RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
63+
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
64+
RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
65+
BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
66+
BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
67+
6068
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
6169
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
6270
PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1

bolt/test/link_fdata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@
3636
fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
3737

3838
# Pre-aggregated profile:
39-
# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
40-
# <count> [<mispred_count>]
41-
preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
39+
# {T|S|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
40+
# <loc>: [<id>:]<offset>
41+
preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
4242

4343
# No-LBR profile:
4444
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>

0 commit comments

Comments
 (0)