Skip to content
14 changes: 0 additions & 14 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,6 @@ class DataAggregator : public DataReader {
uint64_t Addr;
};

/// Used for parsing specific pre-aggregated input files.
struct AggregatedLBREntry {
enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE };
Location From;
Location To;
uint64_t Count;
uint64_t Mispreds;
Type EntryType;
};

struct Trace {
uint64_t From;
uint64_t To;
Expand Down Expand Up @@ -131,7 +121,6 @@ class DataAggregator : public DataReader {
/// and use them later for processing and assigning profile.
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
std::vector<AggregatedLBREntry> AggregatedLBRs;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
std::vector<PerfMemSample> MemSamples;

Expand Down Expand Up @@ -423,9 +412,6 @@ class DataAggregator : public DataReader {
/// an external tool.
std::error_code parsePreAggregatedLBRSamples();

/// Process parsed pre-aggregated data.
void processPreAggregated();

/// If \p Address falls into the binary address space based on memory
/// mapping info \p MMI, then adjust it for further processing by subtracting
/// the base load address. External addresses, i.e. addresses that do not
Expand Down
134 changes: 59 additions & 75 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,19 +444,6 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
this->BC = &BC;

if (opts::ReadPreAggregated) {
parsePreAggregated();
return Error::success();
}

if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
} else {
errs() << "BOLT-WARNING: build-id will not be checked because we could "
"not read one from input binary\n";
}

auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
exit(1);
Expand All @@ -469,6 +456,19 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
ErrorCallback(ReturnCode, ErrBuf);
};

if (opts::ReadPreAggregated) {
parsePreAggregated();
goto heatmap;
}

if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
} else {
errs() << "BOLT-WARNING: build-id will not be checked because we could "
"not read one from input binary\n";
}

if (BC.IsLinuxKernel) {
// Current MMap parsing logic does not work with linux kernel.
// MMap entries for linux kernel uses PERF_RECORD_MMAP
Expand Down Expand Up @@ -501,14 +501,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
(opts::BasicAggregation && parseBasicEvents()))
errs() << "PERF2BOLT: failed to parse samples\n";

if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap()) {
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
exit(1);
}
exit(0);
}

// Special handling for memory events
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
return Error::success();
Expand All @@ -519,6 +511,14 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {

deleteTempFiles();

heatmap:
if (opts::HeatmapMode) {
if (std::error_code EC = printLBRHeatMap()) {
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
exit(1);
}
exit(0);
}
return Error::success();
}

Expand Down Expand Up @@ -555,9 +555,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
}

void DataAggregator::processProfile(BinaryContext &BC) {
if (opts::ReadPreAggregated)
processPreAggregated();
else if (opts::BasicAggregation)
if (opts::BasicAggregation)
processBasicEvents();
else
processBranchEvents();
Expand Down Expand Up @@ -586,7 +584,6 @@ void DataAggregator::processProfile(BinaryContext &BC) {
// Release intermediate storage.
clear(BranchLBRs);
clear(FallthroughLBRs);
clear(AggregatedLBRs);
clear(BasicSamples);
clear(MemSamples);
}
Expand Down Expand Up @@ -1215,15 +1212,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
auto Type = AggregatedLBREntry::TRACE;
if (LLVM_LIKELY(TypeOrErr.get() == "T")) {
} else if (TypeOrErr.get() == "B") {
Type = AggregatedLBREntry::BRANCH;
} else if (TypeOrErr.get() == "F") {
Type = AggregatedLBREntry::FT;
} else if (TypeOrErr.get() == "f") {
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
} else {
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
.Case("T", TRACE)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Default(INVALID);
if (Type == INVALID) {
reportError("expected T, B, F or f");
return make_error_code(llvm::errc::io_error);
}
Expand Down Expand Up @@ -1279,13 +1275,28 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
BF->setHasProfileAvailable();

uint64_t Count = static_cast<uint64_t>(Frequency.get());
AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type};
AggregatedLBRs.emplace_back(Entry);
if (Type == AggregatedLBREntry::TRACE) {
auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT
: AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType};
AggregatedLBRs.emplace_back(TraceFt);

Trace Trace(From->Offset, To->Offset);
// Taken trace
if (Type == TRACE || Type == BRANCH) {
TakenBranchInfo &Info = BranchLBRs[Trace];
Info.TakenCount += Count;
Info.MispredCount += Mispreds;

NumTotalSamples += Count;
}
// Construct fallthrough part of the trace
if (Type == TRACE) {
Trace.From = To->Offset;
Trace.To = TraceFtEnd->Offset;
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
}
// Add fallthrough trace
if (Type != BRANCH) {
FTInfo &Info = FallthroughLBRs[Trace];
(Type == FT ? Info.InternCount : Info.ExternCount) += Count;

NumTraces += Count;
}

return std::error_code();
Expand Down Expand Up @@ -1567,7 +1578,6 @@ std::error_code DataAggregator::parseBranchEvents() {
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
}
}
printBranchSamplesDiagnostics();

return std::error_code();
}
Expand Down Expand Up @@ -1595,6 +1605,7 @@ void DataAggregator::processBranchEvents() {
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
}
printBranchSamplesDiagnostics();
}

std::error_code DataAggregator::parseBasicEvents() {
Expand Down Expand Up @@ -1704,43 +1715,16 @@ std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
while (hasData())
size_t AggregatedLBRs = 0;
while (hasData()) {
if (std::error_code EC = parseAggregatedLBREntry())
return EC;

return std::error_code();
}

void DataAggregator::processPreAggregated() {
outs() << "PERF2BOLT: processing pre-aggregated profile...\n";
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);

for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
switch (AggrEntry.EntryType) {
case AggregatedLBREntry::BRANCH:
case AggregatedLBREntry::TRACE:
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
AggrEntry.Mispreds);
NumTotalSamples += AggrEntry.Count;
break;
case AggregatedLBREntry::FT:
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
? AggrEntry.From.Offset
: 0,
AggrEntry.From.Offset, false};
LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
doTrace(First, Second, AggrEntry.Count);
NumTraces += AggrEntry.Count;
break;
}
}
++AggregatedLBRs;
}

outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
<< " aggregated LBR entries\n";
printBranchSamplesDiagnostics();
outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n";

return std::error_code();
}

std::optional<int32_t> DataAggregator::parseCommExecEvent() {
Expand Down
33 changes: 33 additions & 0 deletions bolt/test/X86/heatmap-preagg.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## Test heatmap with pre-aggregated profile

RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
## Non-BOLTed input binary
RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv

## BOLTed input binary
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \
RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \
RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main
RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \
RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s
RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv

CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
CHECK-HEATMAP: HEATMAP: invalid traces: 1

CHECK-SEC-HOT: .init, 0x401000, 0x40101b, 16.8545
CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583
CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872
CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000

CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2

CHECK-SEC-HOT-BAT: .init, 0x401000, 0x40101b, 17.2888
CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132
CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385
CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000
CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595
CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000
Loading