diff --git a/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h b/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h new file mode 100644 index 0000000000000..66209d7685ecc --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h @@ -0,0 +1,50 @@ +#ifndef LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H +#define LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H + +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Transforms/Utils/SampleProfileInference.h" + +namespace llvm { + +class BasicBlockMatchingAndInference : public MachineFunctionPass { +private: + using Edge = std::pair; + using BlockWeightMap = DenseMap; + using EdgeWeightMap = DenseMap; + using BlockEdgeMap = DenseMap>; + + struct WeightInfo { + // Weight of basic blocks. + BlockWeightMap BlockWeights; + // Weight of edges. + EdgeWeightMap EdgeWeights; + }; + +public: + static char ID; + BasicBlockMatchingAndInference(); + + StringRef getPassName() const override { + return "Basic Block Matching and Inference"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &F) override; + + std::optional getWeightInfo(StringRef FuncName) const; + +private: + StringMap ProgramWeightInfo; + + WeightInfo initWeightInfoByMatching(MachineFunction &MF); + + void generateWeightInfoByInference(MachineFunction &MF, + WeightInfo &MatchWeight); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 82dd5feb31dba..497d751b3b26e 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -54,6 +54,8 @@ struct FunctionPathAndClusterInfo { DenseMap NodeCounts; // Edge counts for each edge, stored as a nested map. DenseMap> EdgeCounts; + // Hash for each basic block. + DenseMap BBHashes; }; class BasicBlockSectionsProfileReader { @@ -86,6 +88,10 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; + // Return the complete function path and cluster info for the given function. + std::pair + getFunctionPathAndClusterInfo(StringRef FuncName) const; + private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -195,6 +201,9 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; + std::pair + getFunctionPathAndClusterInfo(StringRef FuncName) const; + // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. bool doInitialization(Module &M) override; diff --git a/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h b/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h new file mode 100644 index 0000000000000..5de1b567e0309 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h @@ -0,0 +1,106 @@ +#ifndef LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H +#define LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H + +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +/// An object wrapping several components of a basic block hash. The combined +/// (blended) hash is represented and stored as one uint64_t, while individual +/// components are of smaller size (e.g., uint16_t or uint8_t). +struct BlendedBlockHash { +private: + static uint64_t combineHashes(uint16_t Hash1, uint16_t Hash2, uint16_t Hash3, + uint16_t Hash4) { + uint64_t Hash = 0; + + Hash |= uint64_t(Hash4); + Hash <<= 16; + + Hash |= uint64_t(Hash3); + Hash <<= 16; + + Hash |= uint64_t(Hash2); + Hash <<= 16; + + Hash |= uint64_t(Hash1); + + return Hash; + } + + static void parseHashes(uint64_t Hash, uint16_t &Hash1, uint16_t &Hash2, + uint16_t &Hash3, uint16_t &Hash4) { + Hash1 = Hash & 0xffff; + Hash >>= 16; + + Hash2 = Hash & 0xffff; + Hash >>= 16; + + Hash3 = Hash & 0xffff; + Hash >>= 16; + + Hash4 = Hash & 0xffff; + Hash >>= 16; + } + +public: + explicit BlendedBlockHash() {} + + explicit BlendedBlockHash(uint64_t CombinedHash) { + parseHashes(CombinedHash, Offset, OpcodeHash, InstrHash, NeighborHash); + } + + /// Combine the blended hash into uint64_t. + uint64_t combine() const { + return combineHashes(Offset, OpcodeHash, InstrHash, NeighborHash); + } + + /// Compute a distance between two given blended hashes. The smaller the + /// distance, the more similar two blocks are. For identical basic blocks, + /// the distance is zero. + uint64_t distance(const BlendedBlockHash &BBH) const { + assert(OpcodeHash == BBH.OpcodeHash && + "incorrect blended hash distance computation"); + uint64_t Dist = 0; + // Account for NeighborHash + Dist += NeighborHash == BBH.NeighborHash ? 0 : 1; + Dist <<= 16; + // Account for InstrHash + Dist += InstrHash == BBH.InstrHash ? 0 : 1; + Dist <<= 16; + // Account for Offset + Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset); + return Dist; + } + + /// The offset of the basic block from the function start. + uint16_t Offset{0}; + /// (Loose) Hash of the basic block instructions, excluding operands. + uint16_t OpcodeHash{0}; + /// (Strong) Hash of the basic block instructions, including opcodes and + /// operands. + uint16_t InstrHash{0}; + /// Hash of the (loose) basic block together with (loose) hashes of its + /// successors and predecessors. + uint16_t NeighborHash{0}; +}; + +class MachineBlockHashInfo : public MachineFunctionPass { + DenseMap MBBHashInfo; + +public: + static char ID; + MachineBlockHashInfo(); + + StringRef getPassName() const override { return "Basic Block Hash Compute"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &F) override; + + uint64_t getMBBHash(const MachineBasicBlock &MBB); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index f17d550623efc..4bee8bec932ef 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -69,6 +69,13 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass(); LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass(); +/// createBasicBlockMatchingAndInferencePass - This pass enables matching +/// and inference when using propeller. +MachineFunctionPass *createBasicBlockMatchingAndInferencePass(); + +/// createMachineBlockHashInfoPass - This pass computes basic block hashes. +MachineFunctionPass *createMachineBlockHashInfoPass(); + /// createMachineFunctionSplitterPass - This pass splits machine functions /// using profile information. LLVM_ABI MachineFunctionPass *createMachineFunctionSplitterPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 88272f053c114..bf3895cc6b16d 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -55,6 +55,7 @@ LLVM_ABI void initializeAlwaysInlinerLegacyPassPass(PassRegistry &); LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &); LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &); LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &); +LLVM_ABI void initializeBasicBlockMatchingAndInferencePass(PassRegistry &); LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &); @@ -62,6 +63,7 @@ LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &); LLVM_ABI void initializeBarrierNoopPass(PassRegistry &); LLVM_ABI void initializeBasicAAWrapperPassPass(PassRegistry &); LLVM_ABI void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &); +LLVM_ABI void initializeMachineBlockHashInfoPass(PassRegistry&); LLVM_ABI void initializeBranchFolderLegacyPass(PassRegistry &); LLVM_ABI void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &); LLVM_ABI void initializeBranchRelaxationLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 4a528eecfc900..74abe3403bbed 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -175,7 +175,7 @@ class MCContext { unsigned GetInstance(unsigned LocalLabelVal); /// SHT_LLVM_BB_ADDR_MAP version to emit. - uint8_t BBAddrMapVersion = 3; + uint8_t BBAddrMapVersion = 4; /// The file name of the log file from the environment variable /// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h index 7231e45fe8eb7..2b4db171bfdfb 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h @@ -130,6 +130,11 @@ template class SampleProfileInference { SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights) : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {} + SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors, + BlockWeightMap &SampleBlockWeights, + EdgeWeightMap &SampleEdgeWeights) + : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights), + SampleEdgeWeights(SampleEdgeWeights) {} /// Apply the profile inference algorithm for a given function void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); @@ -157,6 +162,9 @@ template class SampleProfileInference { /// Map basic blocks to their sampled weights. BlockWeightMap &SampleBlockWeights; + + /// Map edges to their sampled weights. + EdgeWeightMap SampleEdgeWeights; }; template @@ -266,6 +274,14 @@ FlowFunction SampleProfileInference::createFlowFunction( FlowJump Jump; Jump.Source = BlockIndex[BB]; Jump.Target = BlockIndex[Succ]; + auto It = SampleEdgeWeights.find(std::make_pair(BB, Succ)); + if (It != SampleEdgeWeights.end()) { + Jump.HasUnknownWeight = false; + Jump.Weight = It->second; + } else { + Jump.HasUnknownWeight = true; + Jump.Weight = 0; + } Func.Jumps.push_back(Jump); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 701a6a2f0f7a0..1cfba9dcc3551 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockHashInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" @@ -182,6 +183,8 @@ static cl::opt PrintLatency( cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden, cl::init(false)); +extern cl::opt EmitBBHash; + STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; @@ -461,6 +464,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); + if (EmitBBHash) + AU.addRequired(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1427,7 +1432,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, static_cast(BBAddrMapSkipEmitBBEntries), - HasCalls}; + HasCalls, + static_cast(EmitBBHash)}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { @@ -1486,6 +1492,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { PrevMBBEndSymbol = MBBSymbol; } + auto MBHI = Features.BBHash ? &getAnalysis() : nullptr; + if (!Features.OmitBBEntries) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. @@ -1513,6 +1521,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), CurrentLabel); // Emit the Metadata. OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + // Emit the Hash. + if (MBHI) { + OutStreamer->emitULEB128IntValue(MBHI->getMBBHash(MBB)); + } } PrevMBBEndSymbol = MBB.getEndSymbol(); } diff --git a/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp b/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp new file mode 100644 index 0000000000000..aacfaf88d03c0 --- /dev/null +++ b/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp @@ -0,0 +1,172 @@ +#include "llvm/CodeGen/BasicBlockMatchingAndInference.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineBlockHashInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include + +using namespace llvm; + +static cl::opt + PropellerInferThreshold("propeller-infer-threshold", + cl::desc("Threshold for infer stale profile"), + cl::init(0.6), cl::Optional); + +/// The object is used to identify and match basic blocks given their hashes. +class StaleMatcher { +public: + /// Initialize stale matcher. + void init(const std::vector &Blocks, + const std::vector &Hashes) { + assert(Blocks.size() == Hashes.size() && + "incorrect matcher initialization"); + for (size_t I = 0; I < Blocks.size(); I++) { + MachineBasicBlock *Block = Blocks[I]; + uint16_t OpHash = Hashes[I].OpcodeHash; + OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block)); + } + } + + /// Find the most similar block for a given hash. + MachineBasicBlock *matchBlock(BlendedBlockHash BlendedHash) const { + auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); + if (BlockIt == OpHashToBlocks.end()) { + return nullptr; + } + MachineBasicBlock *BestBlock = nullptr; + uint64_t BestDist = std::numeric_limits::max(); + for (auto It : BlockIt->second) { + MachineBasicBlock *Block = It.second; + BlendedBlockHash Hash = It.first; + uint64_t Dist = Hash.distance(BlendedHash); + if (BestBlock == nullptr || Dist < BestDist) { + BestDist = Dist; + BestBlock = Block; + } + } + return BestBlock; + } + +private: + using HashBlockPairType = std::pair; + std::unordered_map> OpHashToBlocks; +}; + +INITIALIZE_PASS_BEGIN(BasicBlockMatchingAndInference, + "machine-block-match-infer", + "Machine Block Matching and Inference Analysis", true, + true) +INITIALIZE_PASS_DEPENDENCY(MachineBlockHashInfo) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) +INITIALIZE_PASS_END(BasicBlockMatchingAndInference, "machine-block-match-infer", + "Machine Block Matching and Inference Analysis", true, true) + +char BasicBlockMatchingAndInference::ID = 0; + +BasicBlockMatchingAndInference::BasicBlockMatchingAndInference() + : MachineFunctionPass(ID) { + initializeBasicBlockMatchingAndInferencePass( + *PassRegistry::getPassRegistry()); +} + +void BasicBlockMatchingAndInference::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +std::optional +BasicBlockMatchingAndInference::getWeightInfo(StringRef FuncName) const { + auto It = ProgramWeightInfo.find(FuncName); + if (It == ProgramWeightInfo.end()) { + return std::nullopt; + } + return It->second; +} + +BasicBlockMatchingAndInference::WeightInfo +BasicBlockMatchingAndInference::initWeightInfoByMatching(MachineFunction &MF) { + std::vector Blocks; + std::vector Hashes; + auto BSPR = &getAnalysis(); + auto MBHI = &getAnalysis(); + for (auto &Block : MF) { + Blocks.push_back(&Block); + Hashes.push_back(BlendedBlockHash(MBHI->getMBBHash(Block))); + } + StaleMatcher Matcher; + Matcher.init(Blocks, Hashes); + BasicBlockMatchingAndInference::WeightInfo MatchWeight; + auto [Flag, PathAndClusterInfo] = + BSPR->getFunctionPathAndClusterInfo(MF.getName()); + if (!Flag) + return std::move(MatchWeight); + for (auto &BlockCount : PathAndClusterInfo.NodeCounts) { + if (PathAndClusterInfo.BBHashes.count(BlockCount.first.BaseID)) { + auto Hash = PathAndClusterInfo.BBHashes[BlockCount.first.BaseID]; + MachineBasicBlock *Block = Matcher.matchBlock(BlendedBlockHash(Hash)); + // When a basic block has clone copies, sum their counts. + if (Block != nullptr) + MatchWeight.BlockWeights[Block] += BlockCount.second; + } + } + for (auto &PredItem : PathAndClusterInfo.EdgeCounts) { + auto PredID = PredItem.first.BaseID; + if (!PathAndClusterInfo.BBHashes.count(PredID)) + continue; + auto PredHash = PathAndClusterInfo.BBHashes[PredID]; + MachineBasicBlock *PredBlock = Matcher.matchBlock(BlendedBlockHash(PredHash)); + if (PredBlock == nullptr) + continue; + for (auto &SuccItem : PredItem.second) { + auto SuccID = SuccItem.first.BaseID; + auto EdgeWeight = SuccItem.second; + if (PathAndClusterInfo.BBHashes.count(SuccID)) { + auto SuccHash = PathAndClusterInfo.BBHashes[SuccID]; + MachineBasicBlock *SuccBlock = Matcher.matchBlock(BlendedBlockHash(SuccHash)); + // When an edge has clone copies, sum their counts. + if (SuccBlock != nullptr) + MatchWeight.EdgeWeights[std::make_pair(PredBlock, SuccBlock)] += EdgeWeight; + } + } + } + return std::move(MatchWeight); +} + +void BasicBlockMatchingAndInference::generateWeightInfoByInference( + MachineFunction &MF, + BasicBlockMatchingAndInference::WeightInfo &MatchWeight) { + BlockEdgeMap Successors; + for (auto &Block : MF) { + for (auto *Succ : Block.successors()) + Successors[&Block].push_back(Succ); + } + SampleProfileInference SPI( + MF, Successors, MatchWeight.BlockWeights, MatchWeight.EdgeWeights); + BlockWeightMap BlockWeights; + EdgeWeightMap EdgeWeights; + SPI.apply(BlockWeights, EdgeWeights); + ProgramWeightInfo.try_emplace( + MF.getName(), BasicBlockMatchingAndInference::WeightInfo{ + std::move(BlockWeights), std::move(EdgeWeights)}); +} + +bool BasicBlockMatchingAndInference::runOnMachineFunction(MachineFunction &MF) { + if (MF.empty()) + return false; + auto MatchWeight = initWeightInfoByMatching(MF); + // If the ratio of the number of MBBs in matching to the total number of MBBs + // in the function is less than the threshold value, the processing should be + // abandoned. + if (static_cast(MatchWeight.BlockWeights.size()) / MF.size() < + PropellerInferThreshold) { + return false; + } + generateWeightInfoByInference(MF, MatchWeight); + return false; +} + +MachineFunctionPass *llvm::createBasicBlockMatchingAndInferencePass() { + return new BasicBlockMatchingAndInference(); +} diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index e317e1c06741f..3e9791b8f60f4 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -72,6 +72,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/BasicBlockMatchingAndInference.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -81,6 +82,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/UniqueBBID.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include using namespace llvm; @@ -175,6 +177,77 @@ updateBranches(MachineFunction &MF, } } +// This function generates the machine basic block clusters of "hot" blocks. +// Currently, only support one cluster creation. +// TODO: Support multi-cluster creation and path cloning. +static std::pair> +createBBClusterInfoForFunction( + const MachineFunction &MF, + BasicBlockMatchingAndInference *BMI) { + unsigned CurrentCluster = 0; + auto OptWeightInfo = BMI->getWeightInfo(MF.getName()); + if (!OptWeightInfo) + return std::pair(false, SmallVector{}); + auto BlockWeights = OptWeightInfo->BlockWeights; + auto EdgeWeights = OptWeightInfo->EdgeWeights; + + SmallVector HotMBBs; + if (MF.size() <= 2) { + for (auto &MBB : MF) { + if (MBB.isEntryBlock() || BlockWeights[&MBB] > 0) { + HotMBBs.push_back(&MBB); + } + } + } else { + SmallVector BlockSizes(MF.size()); + SmallVector BlockCounts(MF.size()); + std::vector OrigOrder; + OrigOrder.reserve(MF.size()); + SmallVector JumpCounts; + + // Init the MBB size and count. + for (auto &MBB : MF) { + auto NonDbgInsts = + instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end()); + int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end()); + BlockSizes[MBB.getNumber()] = 4 * NumInsts; + BlockCounts[MBB.getNumber()] = BlockWeights[&MBB]; + OrigOrder.push_back(&MBB); + } + + // Init the edge count. + for (auto &MBB : MF) { + for (auto *Succ : MBB.successors()) { + auto EdgeWeight = EdgeWeights[std::make_pair(&MBB, Succ)]; + JumpCounts.push_back({static_cast(MBB.getNumber()), + static_cast(Succ->getNumber()), + EdgeWeight}); + } + } + + // Run the layout algorithm. + auto Result = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts); + for (uint64_t R : Result) { + auto Block = OrigOrder[R]; + if (Block->isEntryBlock() || BlockWeights[Block] > 0) + HotMBBs.push_back(Block); + } + } + + // Generate the "hot" basic block cluster. + if(!HotMBBs.empty()) { + SmallVector BBClusterInfos; + unsigned CurrentPosition = 0; + for (auto &MBB : HotMBBs) { + if (MBB->getBBID()) { + BBClusterInfos.push_back({*(MBB->getBBID()), CurrentCluster, CurrentPosition++}); + } + } + return std::pair(true, std::move(BBClusterInfos)); + } + return std::pair(false, SmallVector{}); +} + // This function sorts basic blocks according to the cluster's information. // All explicitly specified clusters of basic blocks will be ordered // accordingly. All non-specified BBs go into a separate "Cold" section. @@ -314,12 +387,17 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) { DenseMap FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { - auto [HasProfile, ClusterInfo] = + std::pair> ExpClusterInfo; + if (auto *BMI = getAnalysisIfAvailable()) { + ExpClusterInfo = createBBClusterInfoForFunction(MF, BMI); + } else { + ExpClusterInfo = getAnalysis() .getClusterInfoForFunction(MF.getName()); - if (!HasProfile) + } + if (!ExpClusterInfo.first) return false; - for (auto &BBClusterInfo : ClusterInfo) { + for (auto &BBClusterInfo : ExpClusterInfo.second) { FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo); } } @@ -402,6 +480,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); + AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); MachineFunctionPass::getAnalysisUsage(AU); diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index fbcd614b85d18..00e13135bb064 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -91,6 +91,15 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } +std::pair +BasicBlockSectionsProfileReader::getFunctionPathAndClusterInfo( + StringRef FuncName) const { + auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName)); + return R != ProgramPathAndClusterInfo.end() + ? std::pair(true, R->second) + : std::pair(false, FunctionPathAndClusterInfo()); +} + // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m @@ -287,6 +296,24 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } + case 'h': { // Basic block hash secifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; + for (auto BBIDHashStr : Values) { + auto [BBIDStr, HashStr] = BBIDHashStr.split(':'); + unsigned long long BBID = 0, Hash = 0; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + return createProfileParseError( + Twine("unsigned integer expected: '") + BBIDStr + "'"); + if (getAsUnsignedInteger(HashStr, 10, Hash)) + return createProfileParseError( + Twine("unsigned integer expected: '") + HashStr + "'"); + FI->second.BBHashes[BBID] = Hash; + } + continue; + } default: return createProfileParseError(Twine("invalid specifier: '") + Twine(Specifier) + "'"); @@ -493,6 +520,12 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } +std::pair +BasicBlockSectionsProfileReaderWrapperPass::getFunctionPathAndClusterInfo( + StringRef FuncName) const { + return BBSPR.getFunctionPathAndClusterInfo(FuncName); +} + BasicBlockSectionsProfileReader & BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { return BBSPR; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index f8f9bbba53e43..ee66c7b549048 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_component_library(LLVMCodeGen BasicBlockSections.cpp BasicBlockPathCloning.cpp BasicBlockSectionsProfileReader.cpp + BasicBlockMatchingAndInference.cpp CalcSpillWeights.cpp CallBrPrepare.cpp CallingConvLower.cpp @@ -108,6 +109,7 @@ add_llvm_component_library(LLVMCodeGen LowerEmuTLS.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp + MachineBlockHashInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp MachineCFGPrinter.cpp diff --git a/llvm/lib/CodeGen/MachineBlockHashInfo.cpp b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp new file mode 100644 index 0000000000000..a9636bd09a1a2 --- /dev/null +++ b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp @@ -0,0 +1,111 @@ +#include "llvm/CodeGen/MachineBlockHashInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +using OperandHashFuncTy = + function_ref; + +uint64_t hashBlock(const MachineBasicBlock &MBB, + OperandHashFuncTy OperandHashFunc) { + uint64_t Hash = 0; + for (const MachineInstr &MI : MBB) { + if (MI.isMetaInstruction()) + continue; + // Ignore terminator instruction + if (MI.isTerminator()) + continue; + Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode()); + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + Hash = OperandHashFunc(Hash, MI.getOperand(i)); + } + } + return Hash; +} + +/// Hashing a 64-bit integer to a 16-bit one. +uint16_t hash_64_to_16(const uint64_t Hash) { + uint16_t Res = (uint16_t)(Hash & 0xFFFF); + Res ^= (uint16_t)((Hash >> 16) & 0xFFFF); + Res ^= (uint16_t)((Hash >> 32) & 0xFFFF); + Res ^= (uint16_t)((Hash >> 48) & 0xFFFF); + return Res; +} + +uint64_t hashInstOperand(uint64_t &Hash, const MachineOperand &Operand) { + return hashing::detail::hash_16_bytes(Hash, hash_value(Operand)); +} + +INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash", + "Machine Block Hash Analysis", true, true) + +char MachineBlockHashInfo::ID = 0; + +MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) { + initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry()); +} + +void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) { + DenseMap BlendedHashes; + DenseMap OpcodeHashes; + uint16_t Offset = 0; + // Initialize hash components + for (MachineBasicBlock &MBB : F) { + BlendedBlockHash BlendedHash; + // offset of the machine basic block + BlendedHash.Offset = Offset; + Offset += MBB.size(); + // Hashing opcodes + uint64_t OpcodeHash = hashBlock( + MBB, [](uint64_t &Hash, const MachineOperand &Op) { return Hash; }); + OpcodeHashes[&MBB] = OpcodeHash; + BlendedHash.OpcodeHash = hash_64_to_16(OpcodeHash); + // Hash complete instructions + uint64_t InstrHash = hashBlock(MBB, hashInstOperand); + BlendedHash.InstrHash = hash_64_to_16(InstrHash); + BlendedHashes[&MBB] = BlendedHash; + } + + // Initialize neighbor hash + for (MachineBasicBlock &MBB : F) { + uint64_t Hash = OpcodeHashes[&MBB]; + // Append hashes of successors + for (MachineBasicBlock *SuccMBB : MBB.successors()) { + uint64_t SuccHash = OpcodeHashes[SuccMBB]; + Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); + } + // Append hashes of predecessors + for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + uint64_t PredHash = OpcodeHashes[PredMBB]; + Hash = hashing::detail::hash_16_bytes(Hash, PredHash); + } + BlendedHashes[&MBB].NeighborHash = hash_64_to_16(Hash); + } + + // Assign hashes + for (MachineBasicBlock &MBB : F) { + if (MBB.getBBID()) { + MBBHashInfo[MBB.getBBID()->BaseID] = BlendedHashes[&MBB].combine(); + } + } + + return false; +} + +uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) { + if (MBB.getBBID()) { + return MBBHashInfo[MBB.getBBID()->BaseID]; + } + return 0; +} + +MachineFunctionPass *llvm::createMachineBlockHashInfoPass() { + return new MachineBlockHashInfo(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index b6169e6c4dc34..11022d6d9d915 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -272,6 +272,16 @@ static cl::opt cl::desc("Split static data sections into hot and cold " "sections using profile information")); +/// Enable matching and inference when using propeller. +static cl::opt + PropellerMatchInfer("propeller-match-infer", + cl::desc("Use match&infer to evaluate stale profile"), + cl::init(false), cl::Optional); + +cl::opt EmitBBHash("emit-bb-hash", + cl::desc("Emit the hash of basic block in the SHT_LLVM_BB_ADDR_MAP section."), + cl::init(false), cl::Optional); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1281,10 +1291,15 @@ void TargetPassConfig::addMachinePasses() { // address map (or both). if (TM->getBBSectionsType() != llvm::BasicBlockSection::None || TM->Options.BBAddrMap) { + if (EmitBBHash || PropellerMatchInfer) + addPass(llvm::createMachineBlockHashInfoPass()); if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); - addPass(llvm::createBasicBlockPathCloningPass()); + if (PropellerMatchInfer) + addPass(llvm::createBasicBlockMatchingAndInferencePass()); + else + addPass(llvm::createBasicBlockPathCloningPass()); } addPass(llvm::createBasicBlockSectionsPass()); } diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-with-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-address-map-with-bb-hash.ll new file mode 100644 index 0000000000000..7117e94445c4d --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-address-map-with-bb-hash.ll @@ -0,0 +1,96 @@ +; Check the basic block sections labels option when emit basic block hash. +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -emit-bb-hash | FileCheck %s --check-prefixes=CHECK,UNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=false -basic-block-address-map -emit-bb-hash | FileCheck %s --check-prefixes=CHECK,NOUNIQ +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -split-machine-functions -emit-bb-hash | FileCheck %s --check-prefixes=CHECK,UNIQ + +define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 { + br i1 %0, label %3, label %8 + +3: + %4 = invoke i32 @_Z3barv() + to label %8 unwind label %6 + br label %10 + +6: + landingpad { ptr, i32 } + catch ptr null + br label %12 + +8: + %9 = call i32 @_Z3foov() + br i1 %1, label %12, label %10 + +10: + %11 = select i1 %1, ptr blockaddress(@_Z3bazb, %3), ptr blockaddress(@_Z3bazb, %12) ; [#uses=1] + indirectbr ptr %11, [label %3, label %12] + +12: + ret void +} + +declare i32 @_Z3barv() #1 + +declare i32 @_Z3foov() #1 + +declare i32 @__gxx_personality_v0(...) + +; UNIQ: .section .text._Z3bazb,"ax",@progbits{{$}} +; NOUNIQ: .section .text,"ax",@progbits,unique,1 +; CHECK-LABEL: _Z3bazb: +; CHECK-LABEL: .Lfunc_begin0: +; CHECK-LABEL: .LBB_END0_0: +; CHECK-LABEL: .LBB0_1: +; CHECK-LABEL: .LBB0_1_CS0: +; CHECK-LABEL: .LBB_END0_1: +; CHECK-LABEL: .LBB0_2: +; CHECK-LABEL: .LBB0_2_CS0: +; CHECK-LABEL: .LBB_END0_2: +; CHECK-LABEL: .LBB0_3: +; CHECK-LABEL: .LBB_END0_3: +; CHECK-LABEL: .Lfunc_end0: + +; UNIQ: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text._Z3bazb{{$}} +;; Verify that with -unique-section-names=false, the unique id of the text section gets assigned to the llvm_bb_addr_map section. +; NOUNIQ: .section .llvm_bb_addr_map,"o",@llvm_bb_addr_map,.text,unique,1 +; CHECK-NEXT: .byte 3 # version +; CHECK-NEXT: .byte 96 # feature +; CHECK-NEXT: .quad .Lfunc_begin0 # function address +; CHECK-NEXT: .byte 6 # number of basic blocks +; CHECK-NEXT: .byte 0 # BB id +; CHECK-NEXT: .uleb128 .Lfunc_begin0-.Lfunc_begin0 +; CHECK-NEXT: .byte 0 # number of callsites +; CHECK-NEXT: .uleb128 .LBB_END0_0-.Lfunc_begin0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .ascii "{{[\x00-\xFF]+}}" +; CHECK-NEXT: .byte 1 # BB id +; CHECK-NEXT: .uleb128 .LBB0_1-.LBB_END0_0 +; CHECK-NEXT: .byte 1 # number of callsites +; CHECK-NEXT: .uleb128 .LBB0_1_CS0-.LBB0_1 +; CHECK-NEXT: .uleb128 .LBB_END0_1-.LBB0_1_CS0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .ascii "{{[\x00-\xFF]+}}" +; CHECK-NEXT: .byte 3 # BB id +; CHECK-NEXT: .uleb128 .LBB0_2-.LBB_END0_1 +; CHECK-NEXT: .byte 1 # number of callsites +; CHECK-NEXT: .uleb128 .LBB0_2_CS0-.LBB0_2 +; CHECK-NEXT: .uleb128 .LBB_END0_2-.LBB0_2_CS0 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .ascii "{{[\x00-\xFF]+}}" +; CHECK-NEXT: .byte 4 # BB id +; CHECK-NEXT: .uleb128 .LBB0_3-.LBB_END0_2 +; CHECK-NEXT: .byte 0 # number of callsites +; CHECK-NEXT: .uleb128 .LBB_END0_3-.LBB0_3 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .ascii "{{[\x00-\xFF]+}}" +; CHECK-NEXT: .byte 5 # BB id +; CHECK-NEXT: .uleb128 .LBB0_4-.LBB_END0_3 +; CHECK-NEXT: .byte 0 # number of callsites +; CHECK-NEXT: .uleb128 .LBB_END0_4-.LBB0_4 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .ascii "{{[\x00-\xFF]+}}" +; CHECK-NEXT: .byte 2 # BB id +; CHECK-NEXT: .uleb128 .LBB0_5-.LBB_END0_4 +; CHECK-NEXT: .byte 0 # number of callsites +; CHECK-NEXT: .uleb128 .LBB_END0_5-.LBB0_5 +; CHECK-NEXT: .byte 5 +; CHECK-NEXT: .ascii "{{[\x00-\xFF]+}}"