Skip to content

Commit 3a58be3

Browse files
committed
[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.4 [skip ci]
1 parent 88738a7 commit 3a58be3

14 files changed

+176
-207
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,10 @@ class BinaryFunction {
388388
/// The profile data for the number of times the function was executed.
389389
uint64_t ExecutionCount{COUNT_NO_PROFILE};
390390

391+
/// Profile data for the number of times this function was entered from
392+
/// external code (DSO, JIT, etc).
393+
uint64_t ExternEntryCount{0};
394+
391395
/// Profile match ratio.
392396
float ProfileMatchRatio{0.0f};
393397

@@ -1877,6 +1881,10 @@ class BinaryFunction {
18771881
return *this;
18781882
}
18791883

1884+
/// Set the profile data for the number of times the function was entered from
1885+
/// external code (DSO/JIT).
1886+
void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; }
1887+
18801888
/// Adjust execution count for the function by a given \p Count. The value
18811889
/// \p Count will be subtracted from the current function count.
18821890
///
@@ -1904,6 +1912,10 @@ class BinaryFunction {
19041912
/// Return COUNT_NO_PROFILE if there's no profile info.
19051913
uint64_t getExecutionCount() const { return ExecutionCount; }
19061914

1915+
/// Return the profile information about the number of times the function was
1916+
/// entered from external code (DSO/JIT).
1917+
uint64_t getExternEntryCount() const { return ExternEntryCount; }
1918+
19071919
/// Return the raw profile information about the number of branch
19081920
/// executions corresponding to this function.
19091921
uint64_t getRawSampleCount() const { return RawSampleCount; }

bolt/include/bolt/Profile/BoltAddressTranslation.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,13 @@ class BoltAddressTranslation {
9898
bool IsBranchSrc) const;
9999

100100
/// Use the map keys containing basic block addresses to infer fall-throughs
101-
/// taken in the path started at FirstLBR.To and ending at SecondLBR.From.
101+
/// taken in the path starting at \p From and ending at \p To.
102102
/// Return std::nullopt if trace is invalid or the list of fall-throughs
103103
/// otherwise.
104104
std::optional<FallthroughListTy> getFallthroughsInTrace(uint64_t FuncAddress,
105105
uint64_t From,
106-
uint64_t To) const;
106+
uint64_t To,
107+
bool IsReturn) const;
107108

108109
/// If available, fetch the address of the hot part linked to the cold part
109110
/// at \p Address. Return 0 otherwise.

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ class DataAggregator : public DataReader {
7878
static bool checkPerfDataMagic(StringRef FileName);
7979

8080
private:
81+
struct LBREntry {
82+
uint64_t From;
83+
uint64_t To;
84+
bool Mispred;
85+
};
86+
friend raw_ostream &operator<<(raw_ostream &OS, const LBREntry &);
87+
8188
struct PerfBranchSample {
8289
SmallVector<LBREntry, 32> LBR;
8390
};
@@ -92,35 +99,43 @@ class DataAggregator : public DataReader {
9299
uint64_t Addr;
93100
};
94101

102+
/// Container for the unit of branch data.
103+
/// Backwards compatible with legacy use for branches and fall-throughs:
104+
/// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only contains
105+
/// fall-through data,
106+
/// - if \p To is EXTERNAL, the trace only contains branch data.
95107
struct Trace {
108+
static constexpr const uint64_t EXTERNAL = 0ULL;
109+
static constexpr const uint64_t FT_ONLY = -1ULL;
110+
static constexpr const uint64_t FT_EXTERNAL_ORIGIN = -2ULL;
111+
112+
uint64_t Branch;
96113
uint64_t From;
97114
uint64_t To;
98-
Trace(uint64_t From, uint64_t To) : From(From), To(To) {}
99115
bool operator==(const Trace &Other) const {
100-
return From == Other.From && To == Other.To;
116+
return Branch == Other.Branch && From == Other.From && To == Other.To;
101117
}
102118
};
119+
friend raw_ostream &operator<<(raw_ostream &OS, const Trace &);
103120

104121
struct TraceHash {
105122
size_t operator()(const Trace &L) const {
106-
return std::hash<uint64_t>()(L.From << 32 | L.To);
123+
return llvm::hash_combine(L.Branch, L.From, L.To);
107124
}
108125
};
109126

110-
struct FTInfo {
111-
uint64_t InternCount{0};
112-
uint64_t ExternCount{0};
113-
};
114-
115127
struct TakenBranchInfo {
116128
uint64_t TakenCount{0};
117129
uint64_t MispredCount{0};
118130
};
119131

120132
/// Intermediate storage for profile data. We save the results of parsing
121133
/// and use them later for processing and assigning profile.
122-
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
123-
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
134+
std::unordered_map<Trace, TakenBranchInfo, TraceHash> TraceMap;
135+
std::vector<std::pair<Trace, TakenBranchInfo>> Traces;
136+
/// Pre-populated addresses of returns, coming from pre-aggregated data or
137+
/// disassembly. Used to disambiguate call-continuation fall-throughs.
138+
std::unordered_set<uint64_t> Returns;
124139
std::unordered_map<uint64_t, uint64_t> BasicSamples;
125140
std::vector<PerfMemSample> MemSamples;
126141

@@ -193,8 +208,8 @@ class DataAggregator : public DataReader {
193208
/// Return a vector of offsets corresponding to a trace in a function
194209
/// if the trace is valid, std::nullopt otherwise.
195210
std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
196-
getFallthroughsInTrace(BinaryFunction &BF, const LBREntry &First,
197-
const LBREntry &Second, uint64_t Count = 1) const;
211+
getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace, uint64_t Count,
212+
bool IsReturn) const;
198213

199214
/// Record external entry into the function \p BF.
200215
///
@@ -255,11 +270,10 @@ class DataAggregator : public DataReader {
255270
uint64_t Mispreds);
256271

257272
/// Register a \p Branch.
258-
bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
273+
bool doBranch(const Trace &Trace, uint64_t Count, uint64_t Mispreds);
259274

260275
/// Register a trace between two LBR entries supplied in execution order.
261-
bool doTrace(const LBREntry &First, const LBREntry &Second,
262-
uint64_t Count = 1);
276+
bool doTrace(const Trace &Trace, uint64_t Count);
263277

264278
/// Parser helpers
265279
/// Return false if we exhausted our parser buffer and finished parsing
@@ -476,7 +490,6 @@ class DataAggregator : public DataReader {
476490

477491
/// Debugging dump methods
478492
void dump() const;
479-
void dump(const LBREntry &LBR) const;
480493
void dump(const PerfBranchSample &Sample) const;
481494
void dump(const PerfMemSample &Sample) const;
482495

@@ -504,6 +517,27 @@ class DataAggregator : public DataReader {
504517

505518
friend class YAMLProfileWriter;
506519
};
520+
521+
inline raw_ostream &operator<<(raw_ostream &OS,
522+
const DataAggregator::LBREntry &L) {
523+
OS << formatv("{0:x} -> {1:x}/{2}", L.From, L.To, L.Mispred ? 'M' : 'P');
524+
return OS;
525+
}
526+
527+
inline raw_ostream &operator<<(raw_ostream &OS,
528+
const DataAggregator::Trace &T) {
529+
switch (T.Branch) {
530+
case DataAggregator::Trace::FT_ONLY:
531+
case DataAggregator::Trace::FT_EXTERNAL_ORIGIN:
532+
break;
533+
default:
534+
OS << Twine::utohexstr(T.Branch) << " -> ";
535+
}
536+
OS << Twine::utohexstr(T.From);
537+
if (T.To)
538+
OS << " ... " << Twine::utohexstr(T.To);
539+
return OS;
540+
}
507541
} // namespace bolt
508542
} // namespace llvm
509543

bolt/include/bolt/Profile/DataReader.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,6 @@ namespace bolt {
3232

3333
class BinaryFunction;
3434

35-
struct LBREntry {
36-
uint64_t From;
37-
uint64_t To;
38-
bool Mispred;
39-
};
40-
41-
inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) {
42-
OS << "0x" << Twine::utohexstr(LBR.From) << " -> 0x"
43-
<< Twine::utohexstr(LBR.To);
44-
return OS;
45-
}
46-
4735
struct Location {
4836
bool IsSymbol;
4937
StringRef Name;
@@ -109,6 +97,9 @@ struct FuncBranchData {
10997
/// Total execution count for the function.
11098
int64_t ExecutionCount{0};
11199

100+
/// Total entry count from external code for the function.
101+
uint64_t ExternEntryCount{0};
102+
112103
/// Indicate if the data was used.
113104
bool Used{false};
114105

bolt/include/bolt/Profile/ProfileYAMLMapping.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ struct BinaryFunctionProfile {
206206
uint32_t Id{0};
207207
llvm::yaml::Hex64 Hash{0};
208208
uint64_t ExecCount{0};
209+
uint64_t ExternEntryCount{0};
209210
std::vector<BinaryBasicBlockProfile> Blocks;
210211
std::vector<InlineTreeNode> InlineTree;
211212
bool Used{false};
@@ -218,6 +219,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
218219
YamlIO.mapRequired("fid", BFP.Id);
219220
YamlIO.mapRequired("hash", BFP.Hash);
220221
YamlIO.mapRequired("exec", BFP.ExecCount);
222+
YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0);
221223
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
222224
YamlIO.mapOptional("blocks", BFP.Blocks,
223225
std::vector<bolt::BinaryBasicBlockProfile>());

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
471471
OS << "\n Sample Count: " << RawSampleCount;
472472
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
473473
}
474+
if (ExternEntryCount)
475+
OS << "\n Extern Entry Count: " << ExternEntryCount;
474476

475477
if (opts::PrintDynoStats && !getLayout().block_empty()) {
476478
OS << '\n';

bolt/lib/Passes/ProfileQualityStats.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,9 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
532532
std::vector<uint64_t> &MaxCountMap = TotalMaxCountMaps[FunctionNum];
533533
std::vector<uint64_t> &MinCountMap = TotalMinCountMaps[FunctionNum];
534534

535+
// Record external entry count into CallGraphIncomingFlows
536+
CallGraphIncomingFlows[FunctionNum] += Function->getExternEntryCount();
537+
535538
// Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows
536539
auto recordCall = [&](const BinaryBasicBlock *SourceBB,
537540
const MCSymbol *DestSymbol, uint64_t Count,

bolt/lib/Profile/BoltAddressTranslation.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,8 @@ uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
511511

512512
std::optional<BoltAddressTranslation::FallthroughListTy>
513513
BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
514-
uint64_t From,
515-
uint64_t To) const {
514+
uint64_t From, uint64_t To,
515+
bool IsReturn) const {
516516
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
517517

518518
// Filter out trivial case
@@ -530,6 +530,12 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
530530
auto FromIter = Map.upper_bound(From);
531531
if (FromIter == Map.begin())
532532
return Res;
533+
534+
// For fall-throughs originating at returns, go back one entry to cover call
535+
// site.
536+
if (IsReturn)
537+
--FromIter;
538+
533539
// Skip instruction entries, to create fallthroughs we are only interested in
534540
// BB boundaries
535541
do {
@@ -546,7 +552,7 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
546552
return Res;
547553

548554
for (auto Iter = FromIter; Iter != ToIter;) {
549-
const uint32_t Src = Iter->first;
555+
const uint32_t Src = Iter->second >> 1;
550556
if (Iter->second & BRANCHENTRY) {
551557
++Iter;
552558
continue;
@@ -557,7 +563,7 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
557563
++Iter;
558564
if (Iter->second & BRANCHENTRY)
559565
break;
560-
Res.emplace_back(Src, Iter->first);
566+
Res.emplace_back(Src, Iter->second >> 1);
561567
}
562568

563569
return Res;

0 commit comments

Comments
 (0)