Skip to content

Commit ac7ae5a

Browse files
committed
[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.4 [skip ci]
1 parent 7f922f1 commit ac7ae5a

File tree

12 files changed

+76
-154
lines changed

12 files changed

+76
-154
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,8 @@ class BinaryFunction {
386386
/// Profile match ratio.
387387
float ProfileMatchRatio{0.0f};
388388

389-
/// Raw branch count for this function in the profile.
390-
uint64_t RawBranchCount{0};
389+
/// Raw sample/branch count for this function in the profile.
390+
uint64_t RawSampleCount{0};
391391

392392
/// Dynamically executed function bytes, used for density computation.
393393
uint64_t SampleCountInBytes{0};
@@ -1880,13 +1880,12 @@ class BinaryFunction {
18801880
/// Return COUNT_NO_PROFILE if there's no profile info.
18811881
uint64_t getExecutionCount() const { return ExecutionCount; }
18821882

1883-
/// Return the raw profile information about the number of branch
1884-
/// executions corresponding to this function.
1885-
uint64_t getRawBranchCount() const { return RawBranchCount; }
1883+
/// Return the raw profile information about the number of samples (basic
1884+
/// profile) or branch executions (branch profile) recorded in this function.
1885+
uint64_t getRawSampleCount() const { return RawSampleCount; }
18861886

1887-
/// Set the profile data about the number of branch executions corresponding
1888-
/// to this function.
1889-
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
1887+
/// Set raw count of samples or branches recorded in this function.
1888+
void setRawSampleCount(uint64_t Count) { RawSampleCount = Count; }
18901889

18911890
/// Return the number of dynamically executed bytes, from raw perf data.
18921891
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,6 @@ class DataAggregator : public DataReader {
197197

198198
BoltAddressTranslation *BAT{nullptr};
199199

200-
/// Whether pre-aggregated profile needs to convert branch profile into call
201-
/// to continuation fallthrough profile.
202-
bool NeedsConvertRetProfileToCallCont{false};
203-
204200
/// Update function execution profile with a recorded trace.
205201
/// A trace is region of code executed between two LBR entries supplied in
206202
/// execution order.

bolt/include/bolt/Profile/DataReader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,9 @@ struct FuncSampleData {
252252
/// Get the number of samples recorded in [Start, End)
253253
uint64_t getSamples(uint64_t Start, uint64_t End) const;
254254

255+
/// Returns the total number of samples recorded in this function.
256+
uint64_t getSamples() const;
257+
255258
/// Aggregation helper
256259
DenseMap<uint64_t, size_t> Index;
257260

bolt/include/bolt/Profile/Heatmap.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ class Heatmap {
5757
}
5858

5959
/// Register a single sample at \p Address.
60-
void registerAddress(uint64_t Address) {
60+
void registerAddress(uint64_t Address, uint64_t Count) {
6161
if (!ignoreAddress(Address))
62-
++Map[Address / BucketSize];
62+
Map[Address / BucketSize] += Count;
6363
}
6464

6565
/// Register \p Count samples at [\p StartAddress, \p EndAddress ].

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
473473
OS << "\n Image : 0x" << Twine::utohexstr(getImageAddress());
474474
if (ExecutionCount != COUNT_NO_PROFILE) {
475475
OS << "\n Exec Count : " << ExecutionCount;
476-
OS << "\n Branch Count: " << RawBranchCount;
476+
OS << "\n Branch Count: " << RawSampleCount;
477477
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
478478
}
479479

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1445,7 +1445,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14451445
if (!Function.hasProfile())
14461446
continue;
14471447

1448-
uint64_t SampleCount = Function.getRawBranchCount();
1448+
uint64_t SampleCount = Function.getRawSampleCount();
14491449
TotalSampleCount += SampleCount;
14501450

14511451
if (Function.hasValidProfile()) {

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 44 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -499,18 +499,15 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
499499
filterBinaryMMapInfo();
500500
prepareToParse("events", MainEventsPPI, ErrorCallback);
501501

502+
if (opts::BasicAggregation ? parseBasicEvents() : parseBranchEvents())
503+
errs() << "PERF2BOLT: failed to parse samples\n";
504+
502505
if (opts::HeatmapMode) {
503-
if (std::error_code EC = printLBRHeatMap()) {
504-
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
505-
exit(1);
506-
}
506+
if (std::error_code EC = printLBRHeatMap())
507+
return errorCodeToError(EC);
507508
exit(0);
508509
}
509510

510-
if ((!opts::BasicAggregation && parseBranchEvents()) ||
511-
(opts::BasicAggregation && parseBasicEvents()))
512-
errs() << "PERF2BOLT: failed to parse samples\n";
513-
514511
// Special handling for memory events
515512
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
516513
return Error::success();
@@ -567,15 +564,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
567564
processMemEvents();
568565

569566
// Mark all functions with registered events as having a valid profile.
570-
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
571-
: BinaryFunction::PF_LBR;
572567
for (auto &BFI : BC.getBinaryFunctions()) {
573568
BinaryFunction &BF = BFI.second;
574-
FuncBranchData *FBD = getBranchData(BF);
575-
if (FBD || getFuncSampleData(BF.getNames())) {
576-
BF.markProfiled(Flags);
577-
if (FBD)
578-
BF.RawBranchCount = FBD->getNumExecutedBranches();
569+
if (FuncBranchData *FBD = getBranchData(BF)) {
570+
BF.markProfiled(BinaryFunction::PF_LBR);
571+
BF.RawSampleCount = FBD->getNumExecutedBranches();
572+
} else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
573+
BF.markProfiled(BinaryFunction::PF_SAMPLE);
574+
BF.RawSampleCount = FSD->getSamples();
579575
}
580576
}
581577

@@ -632,10 +628,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
632628

633629
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
634630
uint64_t Count) {
631+
// To record executed bytes, use basic block size as is regardless of BAT.
632+
uint64_t BlockSize = 0;
633+
if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
634+
Address - OrigFunc.getAddress()))
635+
BlockSize = BB->getOriginalSize();
636+
635637
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
636638
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
637-
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
639+
if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
638640
NumColdSamples += Count;
641+
// Attach executed bytes to parent function in case of cold fragment.
642+
Func.SampleCountInBytes += Count * BlockSize;
639643

640644
auto I = NamesToSamples.find(Func.getOneName());
641645
if (I == NamesToSamples.end()) {
@@ -720,23 +724,6 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
720724
: isReturn(Func.disassembleInstructionAtOffset(Offset));
721725
};
722726

723-
// Returns whether \p Offset in \p Func may be a call continuation excluding
724-
// entry points and landing pads.
725-
auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
726-
// No call continuation at a function start.
727-
if (!Offset)
728-
return false;
729-
730-
// FIXME: support BAT case where the function might be in empty state
731-
// (split fragments declared non-simple).
732-
if (!Func.hasCFG())
733-
return false;
734-
735-
// The offset should not be an entry point or a landing pad.
736-
const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
737-
return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
738-
};
739-
740727
// Mutates \p Addr to an offset into the containing function, performing BAT
741728
// offset translation and parent lookup.
742729
//
@@ -749,8 +736,7 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
749736

750737
Addr -= Func->getAddress();
751738

752-
bool IsRetOrCallCont =
753-
IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);
739+
bool IsRet = IsFrom && checkReturn(*Func, Addr);
754740

755741
if (BAT)
756742
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
@@ -761,24 +747,16 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
761747
NumColdSamples += Count;
762748

763749
if (!ParentFunc)
764-
return std::pair{Func, IsRetOrCallCont};
750+
return std::pair{Func, IsRet};
765751

766-
return std::pair{ParentFunc, IsRetOrCallCont};
752+
return std::pair{ParentFunc, IsRet};
767753
};
768754

769-
uint64_t ToOrig = To;
770755
auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true);
771-
auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false);
756+
auto [ToFunc, _] = handleAddress(To, /*IsFrom*/ false);
772757
if (!FromFunc && !ToFunc)
773758
return false;
774759

775-
// Record call to continuation trace.
776-
if (NeedsConvertRetProfileToCallCont && FromFunc != ToFunc &&
777-
(IsReturn || IsCallCont)) {
778-
LBREntry First{ToOrig - 1, ToOrig - 1, false};
779-
LBREntry Second{ToOrig, ToOrig, false};
780-
return doTrace(First, Second, Count);
781-
}
782760
// Ignore returns.
783761
if (IsReturn)
784762
return true;
@@ -1235,21 +1213,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12351213
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
12361214
if (std::error_code EC = TypeOrErr.getError())
12371215
return EC;
1238-
// Pre-aggregated profile with branches and fallthroughs needs to convert
1239-
// return profile into call to continuation fall-through.
1240-
auto Type = AggregatedLBREntry::BRANCH;
1241-
if (TypeOrErr.get() == "B") {
1242-
NeedsConvertRetProfileToCallCont = true;
1216+
auto Type = AggregatedLBREntry::TRACE;
1217+
if (LLVM_LIKELY(TypeOrErr.get() == "T")) {
1218+
} else if (TypeOrErr.get() == "B") {
12431219
Type = AggregatedLBREntry::BRANCH;
12441220
} else if (TypeOrErr.get() == "F") {
1245-
NeedsConvertRetProfileToCallCont = true;
12461221
Type = AggregatedLBREntry::FT;
12471222
} else if (TypeOrErr.get() == "f") {
1248-
NeedsConvertRetProfileToCallCont = true;
12491223
Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
1250-
} else if (TypeOrErr.get() == "T") {
1251-
// Trace is expanded into B and [Ff]
1252-
Type = AggregatedLBREntry::TRACE;
12531224
} else {
12541225
reportError("expected T, B, F or f");
12551226
return make_error_code(llvm::errc::io_error);
@@ -1334,53 +1305,6 @@ std::error_code DataAggregator::printLBRHeatMap() {
13341305
}
13351306
Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress,
13361307
opts::HeatmapMaxAddress, getTextSections(BC));
1337-
uint64_t NumTotalSamples = 0;
1338-
1339-
if (opts::BasicAggregation) {
1340-
while (hasData()) {
1341-
ErrorOr<PerfBasicSample> SampleRes = parseBasicSample();
1342-
if (std::error_code EC = SampleRes.getError()) {
1343-
if (EC == errc::no_such_process)
1344-
continue;
1345-
return EC;
1346-
}
1347-
PerfBasicSample &Sample = SampleRes.get();
1348-
HM.registerAddress(Sample.PC);
1349-
NumTotalSamples++;
1350-
}
1351-
outs() << "HEATMAP: read " << NumTotalSamples << " basic samples\n";
1352-
} else {
1353-
while (hasData()) {
1354-
ErrorOr<PerfBranchSample> SampleRes = parseBranchSample();
1355-
if (std::error_code EC = SampleRes.getError()) {
1356-
if (EC == errc::no_such_process)
1357-
continue;
1358-
return EC;
1359-
}
1360-
1361-
PerfBranchSample &Sample = SampleRes.get();
1362-
1363-
// LBRs are stored in reverse execution order. NextLBR refers to the next
1364-
// executed branch record.
1365-
const LBREntry *NextLBR = nullptr;
1366-
for (const LBREntry &LBR : Sample.LBR) {
1367-
if (NextLBR) {
1368-
// Record fall-through trace.
1369-
const uint64_t TraceFrom = LBR.To;
1370-
const uint64_t TraceTo = NextLBR->From;
1371-
++FallthroughLBRs[Trace(TraceFrom, TraceTo)].InternCount;
1372-
}
1373-
NextLBR = &LBR;
1374-
}
1375-
if (!Sample.LBR.empty()) {
1376-
HM.registerAddress(Sample.LBR.front().To);
1377-
HM.registerAddress(Sample.LBR.back().From);
1378-
}
1379-
NumTotalSamples += Sample.LBR.size();
1380-
}
1381-
outs() << "HEATMAP: read " << NumTotalSamples << " LBR samples\n";
1382-
outs() << "HEATMAP: " << FallthroughLBRs.size() << " unique traces\n";
1383-
}
13841308

13851309
if (!NumTotalSamples) {
13861310
if (opts::BasicAggregation) {
@@ -1396,6 +1320,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
13961320

13971321
outs() << "HEATMAP: building heat map...\n";
13981322

1323+
for (const auto &[PC, Hits] : BasicSamples)
1324+
HM.registerAddress(PC, Hits);
13991325
for (const auto &LBR : FallthroughLBRs) {
14001326
const Trace &Trace = LBR.first;
14011327
const FTInfo &Info = LBR.second;
@@ -1445,7 +1371,10 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14451371
const uint64_t TraceTo = NextLBR->From;
14461372
const BinaryFunction *TraceBF =
14471373
getBinaryFunctionContainingAddress(TraceFrom);
1448-
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
1374+
if (opts::HeatmapMode) {
1375+
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
1376+
++Info.InternCount;
1377+
} else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
14491378
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
14501379
if (TraceBF->containsAddress(LBR.From))
14511380
++Info.InternCount;
@@ -1479,6 +1408,11 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14791408
}
14801409
NextLBR = &LBR;
14811410

1411+
if (opts::HeatmapMode) {
1412+
TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
1413+
++Info.TakenCount;
1414+
continue;
1415+
}
14821416
uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
14831417
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
14841418
if (!From && !To)
@@ -1487,6 +1421,10 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14871421
++Info.TakenCount;
14881422
Info.MispredCount += LBR.Mispred;
14891423
}
1424+
if (opts::HeatmapMode && !Sample.LBR.empty()) {
1425+
++BasicSamples[Sample.LBR.front().To];
1426+
++BasicSamples[Sample.LBR.back().From];
1427+
}
14901428
}
14911429

14921430
void DataAggregator::printColdSamplesDiagnostic() const {
@@ -1663,13 +1601,15 @@ std::error_code DataAggregator::parseBasicEvents() {
16631601

16641602
if (!Sample->PC)
16651603
continue;
1604+
++NumTotalSamples;
16661605

16671606
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
16681607
BF->setHasProfileAvailable();
16691608

16701609
++BasicSamples[Sample->PC];
16711610
EventNames.insert(Sample->EventName);
16721611
}
1612+
outs() << "PERF2BOLT: read " << NumTotalSamples << " basic samples\n";
16731613

16741614
return std::error_code();
16751615
}
@@ -1682,7 +1622,6 @@ void DataAggregator::processBasicEvents() {
16821622
for (auto &Sample : BasicSamples) {
16831623
const uint64_t PC = Sample.first;
16841624
const uint64_t HitCount = Sample.second;
1685-
NumTotalSamples += HitCount;
16861625
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
16871626
if (!Func) {
16881627
OutOfRangeSamples += HitCount;
@@ -1691,7 +1630,6 @@ void DataAggregator::processBasicEvents() {
16911630

16921631
doSample(*Func, PC, HitCount);
16931632
}
1694-
outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
16951633

16961634
printBasicSamplesDiagnostics(OutOfRangeSamples);
16971635
}

bolt/lib/Profile/DataReader.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
128128
return Result;
129129
}
130130

131+
uint64_t FuncSampleData::getSamples() const {
132+
uint64_t Result = 0;
133+
for (const SampleInfo &I : Data)
134+
Result += I.Hits;
135+
return Result;
136+
}
137+
131138
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
132139
auto Iter = Index.find(Offset);
133140
if (Iter == Index.end()) {
@@ -407,12 +414,12 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
407414
FuncBranchData *FBD = getBranchData(BF);
408415
if (FBD) {
409416
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
410-
BF.RawBranchCount = FBD->getNumExecutedBranches();
417+
BF.RawSampleCount = FBD->getNumExecutedBranches();
411418
if (BF.ProfileMatchRatio == 1.0f) {
412419
if (fetchProfileForOtherEntryPoints(BF)) {
413420
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
414421
BF.ExecutionCount = FBD->ExecutionCount;
415-
BF.RawBranchCount = FBD->getNumExecutedBranches();
422+
BF.RawSampleCount = FBD->getNumExecutedBranches();
416423
}
417424
return;
418425
}

0 commit comments

Comments
 (0)