Skip to content

Commit e04f4df

Browse files
committed
Add a new MachineBlockHashInfo pass to calculate the hash value of Machine Basic Blocks and write the hash values into the bb_address_map section.
1 parent a32d491 commit e04f4df

34 files changed

+420
-50
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#ifndef LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
2+
#define LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
3+
4+
#include "llvm/CodeGen/MachineFunctionPass.h"
5+
6+
namespace llvm {
7+
8+
/// An object wrapping several components of a basic block hash. The combined
9+
/// (blended) hash is represented and stored as one uint64_t, while individual
10+
/// components are of smaller size (e.g., uint16_t or uint8_t).
11+
struct BlendedBlockHash {
12+
private:
13+
static uint64_t combineHashes(uint16_t Hash1, uint16_t Hash2, uint16_t Hash3,
14+
uint16_t Hash4) {
15+
uint64_t Hash = 0;
16+
17+
Hash |= uint64_t(Hash4);
18+
Hash <<= 16;
19+
20+
Hash |= uint64_t(Hash3);
21+
Hash <<= 16;
22+
23+
Hash |= uint64_t(Hash2);
24+
Hash <<= 16;
25+
26+
Hash |= uint64_t(Hash1);
27+
28+
return Hash;
29+
}
30+
31+
static void parseHashes(uint64_t Hash, uint16_t &Hash1, uint16_t &Hash2,
32+
uint16_t &Hash3, uint16_t &Hash4) {
33+
Hash1 = Hash & 0xffff;
34+
Hash >>= 16;
35+
36+
Hash2 = Hash & 0xffff;
37+
Hash >>= 16;
38+
39+
Hash3 = Hash & 0xffff;
40+
Hash >>= 16;
41+
42+
Hash4 = Hash & 0xffff;
43+
Hash >>= 16;
44+
}
45+
46+
public:
47+
explicit BlendedBlockHash() {}
48+
49+
explicit BlendedBlockHash(uint64_t CombinedHash) {
50+
parseHashes(CombinedHash, Offset, OpcodeHash, InstrHash, NeighborHash);
51+
}
52+
53+
/// Combine the blended hash into uint64_t.
54+
uint64_t combine() const {
55+
return combineHashes(Offset, OpcodeHash, InstrHash, NeighborHash);
56+
}
57+
58+
/// Compute a distance between two given blended hashes. The smaller the
59+
/// distance, the more similar two blocks are. For identical basic blocks,
60+
/// the distance is zero.
61+
uint64_t distance(const BlendedBlockHash &BBH) const {
62+
assert(OpcodeHash == BBH.OpcodeHash &&
63+
"incorrect blended hash distance computation");
64+
uint64_t Dist = 0;
65+
// Account for NeighborHash
66+
Dist += NeighborHash == BBH.NeighborHash ? 0 : 1;
67+
Dist <<= 16;
68+
// Account for InstrHash
69+
Dist += InstrHash == BBH.InstrHash ? 0 : 1;
70+
Dist <<= 16;
71+
// Account for Offset
72+
Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
73+
return Dist;
74+
}
75+
76+
/// The offset of the basic block from the function start.
77+
uint16_t Offset{0};
78+
/// (Loose) Hash of the basic block instructions, excluding operands.
79+
uint16_t OpcodeHash{0};
80+
/// (Strong) Hash of the basic block instructions, including opcodes and
81+
/// operands.
82+
uint16_t InstrHash{0};
83+
/// Hash of the (loose) basic block together with (loose) hashes of its
84+
/// successors and predecessors.
85+
uint16_t NeighborHash{0};
86+
};
87+
88+
class MachineBlockHashInfo : public MachineFunctionPass {
89+
DenseMap<unsigned, uint64_t> MBBHashInfo;
90+
91+
public:
92+
static char ID;
93+
MachineBlockHashInfo();
94+
95+
StringRef getPassName() const override {
96+
return "Basic Block Hash Compute";
97+
}
98+
99+
void getAnalysisUsage(AnalysisUsage &AU) const override;
100+
101+
bool runOnMachineFunction(MachineFunction &F) override;
102+
103+
uint64_t getMBBHash(const MachineBasicBlock &MBB);
104+
};
105+
106+
} // end namespace llvm
107+
108+
#endif // LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &);
184184
void initializeMIRNamerPass(PassRegistry &);
185185
void initializeMIRPrintingPassPass(PassRegistry &);
186186
void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
187+
void initializeMachineBlockHashInfoPass(PassRegistry&);
187188
void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
188189
void initializeMachineBlockPlacementStatsPass(PassRegistry &);
189190
void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);

llvm/include/llvm/Object/ELFTypes.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -914,9 +914,10 @@ struct BBAddrMap {
914914
uint32_t Size = 0; // Size of the basic block.
915915
Metadata MD = {false, false, false, false,
916916
false}; // Metdata for this basic block.
917+
uint64_t Hash = 0;
917918

918-
BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD)
919-
: ID(ID), Offset(Offset), Size(Size), MD(MD){};
919+
BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD, uint64_t Hash)
920+
: ID(ID), Offset(Offset), Size(Size), MD(MD), Hash(Hash){};
920921

921922
bool operator==(const BBEntry &Other) const {
922923
return ID == Other.ID && Offset == Other.Offset && Size == Other.Size &&

llvm/include/llvm/ObjectYAML/ELFYAML.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ struct BBAddrMapEntry {
162162
llvm::yaml::Hex64 AddressOffset;
163163
llvm::yaml::Hex64 Size;
164164
llvm::yaml::Hex64 Metadata;
165+
llvm::yaml::Hex64 Hash;
165166
};
166167
uint8_t Version;
167168
llvm::yaml::Hex8 Feature;

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "llvm/CodeGen/GCMetadataPrinter.h"
4141
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
4242
#include "llvm/CodeGen/MachineBasicBlock.h"
43+
#include "llvm/CodeGen/MachineBlockHashInfo.h"
4344
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
4445
#include "llvm/CodeGen/MachineConstantPool.h"
4546
#include "llvm/CodeGen/MachineDominators.h"
@@ -450,6 +451,7 @@ const MCSection *AsmPrinter::getCurrentSection() const {
450451
void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
451452
AU.setPreservesAll();
452453
MachineFunctionPass::getAnalysisUsage(AU);
454+
AU.addRequired<MachineBlockHashInfo>();
453455
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
454456
AU.addRequired<GCModuleInfo>();
455457
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
@@ -1477,6 +1479,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
14771479
PrevMBBEndSymbol = MBBSymbol;
14781480
}
14791481

1482+
auto MBHI = &getAnalysis<MachineBlockHashInfo>();
1483+
14801484
if (!Features.OmitBBEntries) {
14811485
// TODO: Remove this check when version 1 is deprecated.
14821486
if (BBAddrMapVersion > 1) {
@@ -1496,6 +1500,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
14961500
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
14971501
// Emit the Metadata.
14981502
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
1503+
// Emit the Hash.
1504+
OutStreamer->emitULEB128IntValue(MBHI->getMBBHash(MBB));
14991505
}
15001506

15011507
PrevMBBEndSymbol = MBB.getEndSymbol();

llvm/lib/CodeGen/BasicBlockSections.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070

7171
#include "llvm/ADT/SmallVector.h"
7272
#include "llvm/ADT/StringRef.h"
73+
#include "llvm/CodeGen/MachineBlockHashInfo.h"
7374
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
7475
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
7576
#include "llvm/CodeGen/MachineDominators.h"
@@ -134,6 +135,7 @@ INITIALIZE_PASS_BEGIN(
134135
"Prepares for basic block sections, by splitting functions "
135136
"into clusters of basic blocks.",
136137
false, false)
138+
INITIALIZE_PASS_DEPENDENCY(MachineBlockHashInfo)
137139
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
138140
INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare",
139141
"Prepares for basic block sections, by splitting functions "
@@ -400,6 +402,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
400402

401403
void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
402404
AU.setPreservesAll();
405+
AU.addRequired<MachineBlockHashInfo>();
403406
AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
404407
AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>();
405408
AU.addUsedIfAvailable<MachinePostDominatorTreeWrapperPass>();

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
108108
LowerEmuTLS.cpp
109109
MachineBasicBlock.cpp
110110
MachineBlockFrequencyInfo.cpp
111+
MachineBlockHashInfo.cpp
111112
MachineBlockPlacement.cpp
112113
MachineBranchProbabilityInfo.cpp
113114
MachineCFGPrinter.cpp

llvm/lib/CodeGen/CodeGen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
7272
initializeMIRNamerPass(Registry);
7373
initializeMIRProfileLoaderPassPass(Registry);
7474
initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
75+
initializeMachineBlockHashInfoPass(Registry);
7576
initializeMachineBlockPlacementLegacyPass(Registry);
7677
initializeMachineBlockPlacementStatsPass(Registry);
7778
initializeMachineCFGPrinterPass(Registry);
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#include "llvm/CodeGen/MachineBlockHashInfo.h"
2+
#include "llvm/CodeGen/Passes.h"
3+
#include "llvm/Target/TargetMachine.h"
4+
#include "llvm/InitializePasses.h"
5+
6+
using namespace llvm;
7+
8+
using OperandHashFuncTy = function_ref<uint64_t(uint64_t &, const MachineOperand &)>;
9+
10+
uint64_t hashBlock(const MachineBasicBlock &MBB, OperandHashFuncTy OperandHashFunc) {
11+
uint64_t Hash = 0;
12+
for (const MachineInstr &MI : MBB) {
13+
if (MI.isPseudo())
14+
continue;
15+
// Ignore unconditional jumps
16+
if (MI.isUnconditionalBranch())
17+
continue;
18+
Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode());
19+
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
20+
Hash = OperandHashFunc(Hash, MI.getOperand(i));
21+
}
22+
}
23+
return Hash;
24+
}
25+
26+
/// Hashing a 64-bit integer to a 16-bit one.
27+
uint16_t hash_64_to_16(const uint64_t Hash) {
28+
uint16_t Res = (uint16_t)(Hash & 0xFFFF);
29+
Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
30+
Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
31+
Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
32+
return Res;
33+
}
34+
35+
uint64_t hashInstOperand(uint64_t &Hash, const MachineOperand &Operand) {
36+
return hashing::detail::hash_16_bytes(Hash, hash_value(Operand));
37+
}
38+
39+
INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash",
40+
"Machine Block Hash Analysis", true, true)
41+
42+
char MachineBlockHashInfo::ID = 0;
43+
44+
MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) {
45+
initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry());
46+
}
47+
48+
void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const {
49+
AU.setPreservesAll();
50+
MachineFunctionPass::getAnalysisUsage(AU);
51+
}
52+
53+
bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) {
54+
DenseMap<MachineBasicBlock *, BlendedBlockHash> BlendedHashes;
55+
DenseMap<MachineBasicBlock *, uint64_t> OpcodeHashes;
56+
uint16_t Offset = 0;
57+
// Initialize hash components
58+
for (MachineBasicBlock &MBB : F) {
59+
BlendedBlockHash BlendedHash;
60+
// offset of the machine basic block
61+
BlendedHash.Offset = Offset;
62+
Offset += MBB.size();
63+
// Hashing opcodes
64+
uint64_t OpcodeHash = hashBlock(MBB, [](uint64_t &Hash, const MachineOperand &Op) { return Hash; });
65+
OpcodeHashes[&MBB] = OpcodeHash;
66+
BlendedHash.OpcodeHash = hash_64_to_16(OpcodeHash);
67+
// Hash complete instructions
68+
uint64_t InstrHash = hashBlock(MBB, hashInstOperand);
69+
BlendedHash.InstrHash = hash_64_to_16(InstrHash);
70+
BlendedHashes[&MBB] = BlendedHash;
71+
}
72+
73+
// Initialize neighbor hash
74+
for (MachineBasicBlock &MBB : F) {
75+
uint64_t Hash = OpcodeHashes[&MBB];
76+
// Append hashes of successors
77+
for (MachineBasicBlock *SuccMBB : MBB.successors()) {
78+
uint64_t SuccHash = OpcodeHashes[SuccMBB];
79+
Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
80+
}
81+
// Append hashes of predecessors
82+
for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
83+
uint64_t PredHash = OpcodeHashes[PredMBB];
84+
Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
85+
}
86+
BlendedHashes[&MBB].NeighborHash = hash_64_to_16(Hash);
87+
}
88+
89+
// Assign hashes
90+
for (MachineBasicBlock &MBB : F) {
91+
if (MBB.getBBID()) {
92+
MBBHashInfo[MBB.getBBID()->BaseID] = BlendedHashes[&MBB].combine();
93+
}
94+
}
95+
96+
return false;
97+
}
98+
99+
uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) {
100+
if (MBB.getBBID()) {
101+
return MBBHashInfo[MBB.getBBID()->BaseID];
102+
}
103+
return 0;
104+
}

llvm/lib/Object/ELF.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
873873
uint32_t Offset = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
874874
uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
875875
uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
876+
uint64_t Hash = readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr);
876877
if (Version >= 1) {
877878
// Offset is calculated relative to the end of the previous BB.
878879
Offset += PrevBBEndOffset;
@@ -884,7 +885,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
884885
MetadataDecodeErr = MetadataOrErr.takeError();
885886
break;
886887
}
887-
BBEntries.push_back({ID, Offset, Size, *MetadataOrErr});
888+
BBEntries.push_back({ID, Offset, Size, *MetadataOrErr, Hash});
888889
}
889890
TotalNumBlocks += BBEntries.size();
890891
}

0 commit comments

Comments
 (0)