@@ -23,6 +23,9 @@ const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
2323void BoltAddressTranslation::writeEntriesForBB (MapTy &Map,
2424 const BinaryBasicBlock &BB,
2525 uint64_t FuncAddress) {
26+ uint64_t HotFuncAddress = ColdPartSource.count (FuncAddress)
27+ ? ColdPartSource[FuncAddress]
28+ : FuncAddress;
2629 const uint64_t BBOutputOffset =
2730 BB.getOutputAddressRange ().first - FuncAddress;
2831 const uint32_t BBInputOffset = BB.getInputOffset ();
@@ -39,6 +42,9 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
3942 LLVM_DEBUG (dbgs () << " BB " << BB.getName () << " \n " );
4043 LLVM_DEBUG (dbgs () << " Key: " << Twine::utohexstr (BBOutputOffset)
4144 << " Val: " << Twine::utohexstr (BBInputOffset) << " \n " );
45+ LLVM_DEBUG (dbgs () << formatv (" Hash: {0:x}\n " ,
46+ getBBHash (HotFuncAddress, BBInputOffset)));
47+ (void )HotFuncAddress;
4248 // In case of conflicts (same Key mapping to different Vals), the last
4349 // update takes precedence. Of course it is not ideal to have conflicts and
4450 // those happen when we have an empty BB that either contained only
@@ -72,20 +78,28 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
7278 LLVM_DEBUG (dbgs () << " BOLT-DEBUG: Writing BOLT Address Translation Tables\n " );
7379 for (auto &BFI : BC.getBinaryFunctions ()) {
7480 const BinaryFunction &Function = BFI.second ;
81+ const uint64_t InputAddress = Function.getAddress ();
82+ const uint64_t OutputAddress = Function.getOutputAddress ();
7583 // We don't need a translation table if the body of the function hasn't
7684 // changed
7785 if (Function.isIgnored () || (!BC.HasRelocations && !Function.isSimple ()))
7886 continue ;
7987
88+ // TBD: handle BAT functions w/multiple entry points.
89+ if (Function.isMultiEntry ())
90+ continue ;
91+
8092 LLVM_DEBUG (dbgs () << " Function name: " << Function.getPrintName () << " \n " );
8193 LLVM_DEBUG (dbgs () << " Address reference: 0x"
8294 << Twine::utohexstr (Function.getOutputAddress ()) << " \n " );
95+ LLVM_DEBUG (dbgs () << formatv (" Hash: {0:x}\n " , getBFHash (OutputAddress)));
8396
8497 MapTy Map;
8598 for (const BinaryBasicBlock *const BB :
8699 Function.getLayout ().getMainFragment ())
87100 writeEntriesForBB (Map, *BB, Function.getOutputAddress ());
88101 Maps.emplace (Function.getOutputAddress (), std::move (Map));
102+ ReverseMap.emplace (OutputAddress, InputAddress);
89103
90104 if (!Function.isSplit ())
91105 continue ;
@@ -94,12 +108,12 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
94108 LLVM_DEBUG (dbgs () << " Cold part\n " );
95109 for (const FunctionFragment &FF :
96110 Function.getLayout ().getSplitFragments ()) {
111+ ColdPartSource.emplace (FF.getAddress (), Function.getOutputAddress ());
97112 Map.clear ();
98113 for (const BinaryBasicBlock *const BB : FF)
99114 writeEntriesForBB (Map, *BB, FF.getAddress ());
100115
101116 Maps.emplace (FF.getAddress (), std::move (Map));
102- ColdPartSource.emplace (FF.getAddress (), Function.getOutputAddress ());
103117 }
104118 }
105119
@@ -109,6 +123,11 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
109123 writeMaps</* Cold=*/ true >(Maps, PrevAddress, OS);
110124
111125 BC.outs () << " BOLT-INFO: Wrote " << Maps.size () << " BAT maps\n " ;
126+ const uint64_t NumBBHashes = std::accumulate (
127+ FuncHashes.begin (), FuncHashes.end (), 0ull ,
128+ [](size_t Acc, const auto &B) { return Acc + B.second .second .size (); });
129+ BC.outs () << " BOLT-INFO: Wrote " << FuncHashes.size () << " function and "
130+ << NumBBHashes << " basic block hashes\n " ;
112131}
113132
114133APInt BoltAddressTranslation::calculateBranchEntriesBitMask (MapTy &Map,
@@ -155,6 +174,11 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
155174 // Only process cold fragments in cold mode, and vice versa.
156175 if (Cold != ColdPartSource.count (Address))
157176 continue ;
177+ // NB: here we use the input address because hashes are saved early (in
178+ // `saveMetadata`) before output addresses are assigned.
179+ const uint64_t HotInputAddress =
180+ ReverseMap[Cold ? ColdPartSource[Address] : Address];
181+ std::pair<size_t , BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
158182 MapTy &Map = MapEntry.second ;
159183 const uint32_t NumEntries = Map.size ();
160184 LLVM_DEBUG (dbgs () << " Writing " << NumEntries << " entries for 0x"
@@ -166,6 +190,10 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
166190 std::distance (ColdPartSource.begin (), ColdPartSource.find (Address));
167191 encodeULEB128 (HotIndex - PrevIndex, OS);
168192 PrevIndex = HotIndex;
193+ } else {
194+ // Function hash
195+ LLVM_DEBUG (dbgs () << " Hash: " << formatv (" {0:x}\n " , FuncHashPair.first ));
196+ OS.write (reinterpret_cast <char *>(&FuncHashPair.first ), 8 );
169197 }
170198 encodeULEB128 (NumEntries, OS);
171199 // For hot fragments only: encode the number of equal offsets
@@ -197,6 +225,13 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
197225 if (Index++ >= EqualElems)
198226 encodeSLEB128 (KeyVal.second - InOffset, OS);
199227 InOffset = KeyVal.second ; // Keeping InOffset as if BRANCHENTRY is encoded
228+ if ((InOffset & BRANCHENTRY) == 0 ) {
229+ // Basic block hash
230+ size_t BBHash = FuncHashPair.second [InOffset >> 1 ];
231+ OS.write (reinterpret_cast <char *>(&BBHash), 8 );
232+ LLVM_DEBUG (dbgs () << formatv (" {0:x} -> {1:x} {2:x}\n " , KeyVal.first ,
233+ InOffset >> 1 , BBHash));
234+ }
200235 }
201236 }
202237}
@@ -239,12 +274,18 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
239274 size_t HotIndex = 0 ;
240275 for (uint32_t I = 0 ; I < NumFunctions; ++I) {
241276 const uint64_t Address = PrevAddress + DE.getULEB128 (&Offset, &Err);
277+ uint64_t HotAddress = Cold ? 0 : Address;
242278 PrevAddress = Address;
243279 if (Cold) {
244280 HotIndex += DE.getULEB128 (&Offset, &Err);
245- ColdPartSource.emplace (Address, HotFuncs[HotIndex]);
281+ HotAddress = HotFuncs[HotIndex];
282+ ColdPartSource.emplace (Address, HotAddress);
246283 } else {
247284 HotFuncs.push_back (Address);
285+ // Function hash
286+ const size_t FuncHash = DE.getU64 (&Offset, &Err);
287+ FuncHashes[Address].first = FuncHash;
288+ LLVM_DEBUG (dbgs () << formatv (" {0:x}: hash {1:x}\n " , Address, FuncHash));
248289 }
249290 const uint32_t NumEntries = DE.getULEB128 (&Offset, &Err);
250291 // Equal offsets, hot fragments only.
@@ -288,12 +329,22 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
288329 InputOffset += InputDelta;
289330 }
290331 Map.insert (std::pair<uint32_t , uint32_t >(OutputOffset, InputOffset));
291- LLVM_DEBUG (
292- dbgs () << formatv (" {0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n " ,
293- OutputOffset, InputOffset, OutputDelta,
294- getULEB128Size (OutputDelta), InputDelta,
295- (J < EqualElems) ? 0 : getSLEB128Size (InputDelta),
296- OutputAddress));
332+ size_t BBHash = 0 ;
333+ const bool IsBranchEntry = InputOffset & BRANCHENTRY;
334+ if (!IsBranchEntry) {
335+ BBHash = DE.getU64 (&Offset, &Err);
336+ // Map basic block hash to hot fragment by input offset
337+ FuncHashes[HotAddress].second .emplace (InputOffset >> 1 , BBHash);
338+ }
339+ LLVM_DEBUG ({
340+ dbgs () << formatv (
341+ " {0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}" , OutputOffset,
342+ InputOffset, OutputDelta, getULEB128Size (OutputDelta), InputDelta,
343+ (J < EqualElems) ? 0 : getSLEB128Size (InputDelta), OutputAddress);
344+ if (BBHash)
345+ dbgs () << formatv (" {0:x}" , BBHash);
346+ dbgs () << ' \n ' ;
347+ });
297348 }
298349 Maps.insert (std::pair<uint64_t , MapTy>(Address, Map));
299350 }
@@ -303,7 +354,12 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
303354 const size_t NumTables = Maps.size ();
304355 OS << " BAT tables for " << NumTables << " functions:\n " ;
305356 for (const auto &MapEntry : Maps) {
306- OS << " Function Address: 0x" << Twine::utohexstr (MapEntry.first ) << " \n " ;
357+ const uint64_t Address = MapEntry.first ;
358+ const uint64_t HotAddress = fetchParentAddress (Address);
359+ OS << " Function Address: 0x" << Twine::utohexstr (Address);
360+ if (HotAddress == 0 )
361+ OS << formatv (" , hash: {0:x}" , getBFHash (Address));
362+ OS << " \n " ;
307363 OS << " BB mappings:\n " ;
308364 for (const auto &Entry : MapEntry.second ) {
309365 const bool IsBranch = Entry.second & BRANCHENTRY;
@@ -312,6 +368,9 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
312368 << " 0x" << Twine::utohexstr (Val);
313369 if (IsBranch)
314370 OS << " (branch)" ;
371+ else
372+ OS << formatv (" hash: {0:x}" ,
373+ getBBHash (HotAddress ? HotAddress : Address, Val));
315374 OS << " \n " ;
316375 }
317376 OS << " \n " ;
@@ -439,5 +498,15 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
439498 BB.getHash ());
440499 }
441500}
501+
502+ size_t BoltAddressTranslation::getBBHash (uint64_t FuncOutputAddress,
503+ uint32_t BBInputOffset) const {
504+ return FuncHashes.at (FuncOutputAddress).second .at (BBInputOffset);
505+ }
506+
507+ size_t BoltAddressTranslation::getBFHash (uint64_t OutputAddress) const {
508+ return FuncHashes.at (OutputAddress).first ;
509+ }
510+
442511} // namespace bolt
443512} // namespace llvm
0 commit comments