@@ -23,6 +23,9 @@ const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
2323void BoltAddressTranslation::writeEntriesForBB (MapTy &Map,
2424 const BinaryBasicBlock &BB,
2525 uint64_t FuncAddress) {
26+ uint64_t HotFuncAddress = ColdPartSource.count (FuncAddress)
27+ ? ColdPartSource[FuncAddress]
28+ : FuncAddress;
2629 const uint64_t BBOutputOffset =
2730 BB.getOutputAddressRange ().first - FuncAddress;
2831 const uint32_t BBInputOffset = BB.getInputOffset ();
@@ -39,6 +42,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
3942 LLVM_DEBUG (dbgs () << " BB " << BB.getName () << " \n " );
4043 LLVM_DEBUG (dbgs () << " Key: " << Twine::utohexstr (BBOutputOffset)
4144 << " Val: " << Twine::utohexstr (BBInputOffset) << " \n " );
45+ LLVM_DEBUG (dbgs () << formatv (" Hash: {0:x}\n " ,
46+ getBBHash (HotFuncAddress, BBInputOffset)));
4247 // In case of conflicts (same Key mapping to different Vals), the last
4348 // update takes precedence. Of course it is not ideal to have conflicts and
4449 // those happen when we have an empty BB that either contained only
@@ -72,20 +77,28 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
7277 LLVM_DEBUG (dbgs () << " BOLT-DEBUG: Writing BOLT Address Translation Tables\n " );
7378 for (auto &BFI : BC.getBinaryFunctions ()) {
7479 const BinaryFunction &Function = BFI.second ;
80+ const uint64_t InputAddress = Function.getAddress ();
81+ const uint64_t OutputAddress = Function.getOutputAddress ();
7582 // We don't need a translation table if the body of the function hasn't
7683 // changed
7784 if (Function.isIgnored () || (!BC.HasRelocations && !Function.isSimple ()))
7885 continue ;
7986
87+ // TBD: handle BAT functions w/multiple entry points.
88+ if (Function.isMultiEntry ())
89+ continue ;
90+
8091 LLVM_DEBUG (dbgs () << " Function name: " << Function.getPrintName () << " \n " );
8192 LLVM_DEBUG (dbgs () << " Address reference: 0x"
8293 << Twine::utohexstr (Function.getOutputAddress ()) << " \n " );
94+ LLVM_DEBUG (dbgs () << formatv (" Hash: {0:x}\n " , getBFHash (OutputAddress)));
8395
8496 MapTy Map;
8597 for (const BinaryBasicBlock *const BB :
8698 Function.getLayout ().getMainFragment ())
8799 writeEntriesForBB (Map, *BB, Function.getOutputAddress ());
88100 Maps.emplace (Function.getOutputAddress (), std::move (Map));
101+ ReverseMap.emplace (OutputAddress, InputAddress);
89102
90103 if (!Function.isSplit ())
91104 continue ;
@@ -94,12 +107,12 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
94107 LLVM_DEBUG (dbgs () << " Cold part\n " );
95108 for (const FunctionFragment &FF :
96109 Function.getLayout ().getSplitFragments ()) {
110+ ColdPartSource.emplace (FF.getAddress (), Function.getOutputAddress ());
97111 Map.clear ();
98112 for (const BinaryBasicBlock *const BB : FF)
99113 writeEntriesForBB (Map, *BB, FF.getAddress ());
100114
101115 Maps.emplace (FF.getAddress (), std::move (Map));
102- ColdPartSource.emplace (FF.getAddress (), Function.getOutputAddress ());
103116 }
104117 }
105118
@@ -109,6 +122,11 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
109122 writeMaps</* Cold=*/ true >(Maps, PrevAddress, OS);
110123
111124 BC.outs () << " BOLT-INFO: Wrote " << Maps.size () << " BAT maps\n " ;
125+ const uint64_t NumBBHashes = std::accumulate (
126+ FuncHashes.begin (), FuncHashes.end (), 0ull ,
127+ [](size_t Acc, const auto &B) { return Acc + B.second .second .size (); });
128+ BC.outs () << " BOLT-INFO: Wrote " << FuncHashes.size () << " function and "
129+ << NumBBHashes << " basic block hashes\n " ;
112130}
113131
114132APInt BoltAddressTranslation::calculateBranchEntriesBitMask (MapTy &Map,
@@ -155,6 +173,11 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
155173 // Only process cold fragments in cold mode, and vice versa.
156174 if (Cold != ColdPartSource.count (Address))
157175 continue ;
176+ // NB: here we use the input address because hashes are saved early (in
177+ // `saveMetadata`) before output addresses are assigned.
178+ const uint64_t HotInputAddress =
179+ ReverseMap[Cold ? ColdPartSource[Address] : Address];
180+ std::pair<size_t , BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
158181 MapTy &Map = MapEntry.second ;
159182 const uint32_t NumEntries = Map.size ();
160183 LLVM_DEBUG (dbgs () << " Writing " << NumEntries << " entries for 0x"
@@ -166,6 +189,10 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
166189 std::distance (ColdPartSource.begin (), ColdPartSource.find (Address));
167190 encodeULEB128 (HotIndex - PrevIndex, OS);
168191 PrevIndex = HotIndex;
192+ } else {
193+ // Function hash
194+ LLVM_DEBUG (dbgs () << " Hash: " << formatv (" {0:x}\n " , FuncHashPair.first ));
195+ OS.write (reinterpret_cast <char *>(&FuncHashPair.first ), 8 );
169196 }
170197 encodeULEB128 (NumEntries, OS);
171198 // For hot fragments only: encode the number of equal offsets
@@ -197,6 +224,13 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
197224 if (Index++ >= EqualElems)
198225 encodeSLEB128 (KeyVal.second - InOffset, OS);
199226 InOffset = KeyVal.second ; // Keeping InOffset as if BRANCHENTRY is encoded
227+ if ((InOffset & BRANCHENTRY) == 0 ) {
228+ // Basic block hash
229+ size_t BBHash = FuncHashPair.second [InOffset >> 1 ];
230+ OS.write (reinterpret_cast <char *>(&BBHash), 8 );
231+ LLVM_DEBUG (dbgs () << formatv (" {0:x} -> {1:x} {2:x}\n " , KeyVal.first ,
232+ InOffset >> 1 , BBHash));
233+ }
200234 }
201235 }
202236}
@@ -239,12 +273,18 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
239273 size_t HotIndex = 0 ;
240274 for (uint32_t I = 0 ; I < NumFunctions; ++I) {
241275 const uint64_t Address = PrevAddress + DE.getULEB128 (&Offset, &Err);
276+ uint64_t HotAddress = Cold ? 0 : Address;
242277 PrevAddress = Address;
243278 if (Cold) {
244279 HotIndex += DE.getULEB128 (&Offset, &Err);
245- ColdPartSource.emplace (Address, HotFuncs[HotIndex]);
280+ HotAddress = HotFuncs[HotIndex];
281+ ColdPartSource.emplace (Address, HotAddress);
246282 } else {
247283 HotFuncs.push_back (Address);
284+ // Function hash
285+ const size_t FuncHash = DE.getU64 (&Offset, &Err);
286+ FuncHashes[Address].first = FuncHash;
287+ LLVM_DEBUG (dbgs () << formatv (" {0:x}: hash {1:x}\n " , Address, FuncHash));
248288 }
249289 const uint32_t NumEntries = DE.getULEB128 (&Offset, &Err);
250290 // Equal offsets, hot fragments only.
@@ -288,12 +328,22 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
288328 InputOffset += InputDelta;
289329 }
290330 Map.insert (std::pair<uint32_t , uint32_t >(OutputOffset, InputOffset));
291- LLVM_DEBUG (
292- dbgs () << formatv (" {0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n " ,
293- OutputOffset, InputOffset, OutputDelta,
294- getULEB128Size (OutputDelta), InputDelta,
295- (J < EqualElems) ? 0 : getSLEB128Size (InputDelta),
296- OutputAddress));
331+ size_t BBHash = 0 ;
332+ const bool IsBranchEntry = InputOffset & BRANCHENTRY;
333+ if (!IsBranchEntry) {
334+ BBHash = DE.getU64 (&Offset, &Err);
335+ // Map basic block hash to hot fragment by input offset
336+ FuncHashes[HotAddress].second .emplace (InputOffset >> 1 , BBHash);
337+ }
338+ LLVM_DEBUG ({
339+ dbgs () << formatv (
340+ " {0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}" , OutputOffset,
341+ InputOffset, OutputDelta, getULEB128Size (OutputDelta), InputDelta,
342+ (J < EqualElems) ? 0 : getSLEB128Size (InputDelta), OutputAddress);
343+ if (BBHash)
344+ dbgs () << formatv (" {0:x}" , BBHash);
345+ dbgs () << ' \n ' ;
346+ });
297347 }
298348 Maps.insert (std::pair<uint64_t , MapTy>(Address, Map));
299349 }
@@ -303,7 +353,12 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
303353 const size_t NumTables = Maps.size ();
304354 OS << " BAT tables for " << NumTables << " functions:\n " ;
305355 for (const auto &MapEntry : Maps) {
306- OS << " Function Address: 0x" << Twine::utohexstr (MapEntry.first ) << " \n " ;
356+ const uint64_t Address = MapEntry.first ;
357+ const uint64_t HotAddress = fetchParentAddress (Address);
358+ OS << " Function Address: 0x" << Twine::utohexstr (Address);
359+ if (HotAddress == 0 )
360+ OS << formatv (" , hash: {0:x}" , getBFHash (Address));
361+ OS << " \n " ;
307362 OS << " BB mappings:\n " ;
308363 for (const auto &Entry : MapEntry.second ) {
309364 const bool IsBranch = Entry.second & BRANCHENTRY;
@@ -312,6 +367,9 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
312367 << " 0x" << Twine::utohexstr (Val);
313368 if (IsBranch)
314369 OS << " (branch)" ;
370+ else
371+ OS << formatv (" hash: {0:x}" ,
372+ getBBHash (HotAddress ? HotAddress : Address, Val));
315373 OS << " \n " ;
316374 }
317375 OS << " \n " ;
@@ -439,5 +497,15 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
439497 BB.getHash ());
440498 }
441499}
500+
501+ size_t BoltAddressTranslation::getBBHash (uint64_t FuncOutputAddress,
502+ uint32_t BBInputOffset) const {
503+ return FuncHashes.at (FuncOutputAddress).second .at (BBInputOffset);
504+ }
505+
506+ size_t BoltAddressTranslation::getBFHash (uint64_t OutputAddress) const {
507+ return FuncHashes.at (OutputAddress).first ;
508+ }
509+
442510} // namespace bolt
443511} // namespace llvm
0 commit comments