From 49c5f229c8f4829551462bc1ad335c752d0fc643 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Sat, 8 Nov 2025 19:54:21 +0000 Subject: [PATCH 01/38] feat(AsmPrinter): Add support for emitting prefetch target symbols --- .../CodeGen/BasicBlockSectionsProfileReader.h | 45 ++++++++++++++++- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +++++++++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 50 ++++++++++++++++++- .../BasicBlockSectionsProfileReader.cpp | 44 ++++++++++++++++ llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 +++++ 5 files changed, 174 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index ee1f28377f7e4..5b230db30aec4 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,6 +42,17 @@ struct BBClusterInfo { unsigned PositionInCluster; }; +struct BBPosition { + UniqueBBID BBID; + unsigned BBOffset; +}; + +struct PrefetchHint { + BBPosition SitePosition; + StringRef TargetFunctionName; + BBPosition TargetPosition; +}; + // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { // BB Cluster information specified by `UniqueBBID`s. @@ -50,9 +61,11 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; + SmallVector PrefetchHints; + DenseSet PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; - // Edge counts for each edge, stored as a nested map. + // Edge counts for each edge. DenseMap> EdgeCounts; // Hash for each basic block. The Hashes are stored for every original block // (not cloned blocks), hence the map key being unsigned instead of @@ -60,6 +73,27 @@ struct FunctionPathAndClusterInfo { DenseMap BBHashes; }; +// Provides DenseMapInfo BBPosition. +template <> struct DenseMapInfo { + static inline BBPosition getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()}; + } + static inline BBPosition getTombstoneKey() { + return BBPosition{DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const BBPosition &Val) { + std::pair PairVal = std::make_pair( + DenseMapInfo::getHashValue(Val.BBID), Val.BBOffset); + return DenseMapInfo>::getHashValue(PairVal); + } + static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) { + return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && + DenseMapInfo::isEqual(LHS.BBOffset, RHS.BBOffset); + } +}; + class BasicBlockSectionsProfileReader { public: friend class BasicBlockSectionsProfileReaderWrapperPass; @@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; + SmallVector + getPrefetchHintsForFunction(StringRef FuncName) const; + + DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; + private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; + SmallVector + getPrefetchHintsForFunction(StringRef FuncName) const; + + DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index fcf7bab09fcff..e6c6bc26ae9e6 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -100,6 +100,12 @@ template <> struct DenseMapInfo { } }; +struct PrefetchTarget { + StringRef TargetFunction; + UniqueBBID TargetBBID; + unsigned TargetBBOffset; +}; + template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -213,6 +219,8 @@ class MachineBasicBlock /// basic block sections and basic block labels. std::optional BBID; + SmallVector PrefetchTargets; + /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -229,6 +237,8 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; + mutable SmallVector CallInstSymbols; + /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -710,6 +720,14 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } + const SmallVector &getPrefetchTargets() const { + return PrefetchTargets; + } + + void setPrefetchTargets(const SmallVector &V) { + PrefetchTargets = V; + } + /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } @@ -1275,6 +1293,12 @@ class MachineBasicBlock /// Return the MCSymbol for this basic block. LLVM_ABI MCSymbol *getSymbol() const; + MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const; + + const SmallVector& getCallInstSymbols() const { + return CallInstSymbols; + } + /// Return the Windows EH Continuation Symbol for this basic block. LLVM_ABI MCSymbol *getEHContSymbol() const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3aa245b7f3f1e..a204bba5789a8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,6 +18,7 @@ #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" +#include "llvm/Support/SMLoc.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitmaskEnum.h" @@ -178,6 +179,11 @@ static cl::opt EmitJumpTableSizesSection( cl::desc("Emit a section containing jump table addresses and sizes"), cl::Hidden, cl::init(false)); +static cl::opt InsertNoopsForPrefetch( + "insert-noops-for-prefetch", + cl::desc("Whether to insert noops instead of prefetches."), cl::init(false), + cl::Hidden); + // This isn't turned on by default, since several of the scheduling models are // not completely accurate, and we don't want to be misleading. static cl::opt PrintLatency( @@ -1982,10 +1988,34 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); for (auto &MBB : *MF) { + int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; + unsigned NumCallsInBlock = 0; for (auto &MI : MBB) { + if (NextPrefetchTargetIndex != -1 && + NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) { + + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + + Twine("_") + + utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); + if (MF->getFunction().isWeakForLinker()) { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak); + errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n"; + } else { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); + errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n"; + } + // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); + // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n"; + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++NextPrefetchTargetIndex; + if (NextPrefetchTargetIndex >= + static_cast(MBB.getPrefetchTargets().size())) + NextPrefetchTargetIndex = -1; + } // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugInstr()) { @@ -2099,7 +2129,7 @@ void AsmPrinter::emitFunctionBody() { break; } default: - emitInstruction(&MI); + emitInstruction(&MI); auto CountInstruction = [&](const MachineInstr &MI) { // Skip Meta instructions inside bundles. @@ -2136,6 +2166,24 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } + while (NextPrefetchTargetIndex != -1) { + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + + Twine("_") + + utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); + if (MF->getFunction().hasWeakLinkage()) { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition); + } else { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); + } + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++NextPrefetchTargetIndex; + if (NextPrefetchTargetIndex >= + static_cast(MBB.getPrefetchTargets().size())) + NextPrefetchTargetIndex = -1; + } + // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index c234c0f1b0b34..de146e172c174 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,6 +93,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } +SmallVector +BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; +} + +DenseSet +BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) + .PrefetchTargets; +} + // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m @@ -308,6 +321,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } + case 't': { // Prefetch target specifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; + assert(Values.size() == 1); + SmallVector PrefetchTargetStr; + Values[0].split(PrefetchTargetStr, '@'); + assert(PrefetchTargetStr.size() == 2); + auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); + if (!TargetBBID) + return TargetBBID.takeError(); + unsigned long long TargetBBOffset; + if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset)) + return createProfileParseError(Twine("unsigned integer expected: '") + + PrefetchTargetStr[1]); + FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast(TargetBBOffset)}); + continue; + } default: return createProfileParseError(Twine("invalid specifier: '") + Twine(Specifier) + "'"); @@ -514,6 +546,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } +SmallVector +BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction( + StringRef FuncName) const { + return BBSPR.getPrefetchHintsForFunction(FuncName); +} + +DenseSet +BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( + StringRef FuncName) const { + return BBSPR.getPrefetchTargetsForFunction(FuncName); +} + BasicBlockSectionsProfileReader & BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { return BBSPR; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index ba0b025167307..19b218a2879dd 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } +MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const { + if (CallInstSymbols.size() <= CallInstNumber) { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + CallInstSymbols.resize(CallInstNumber + 1); + CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol( + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" + + Twine(CallInstNumber), + /*AlwaysEmit=*/true); + } + return CallInstSymbols[CallInstNumber]; +} + MCSymbol *MachineBasicBlock::getEHContSymbol() const { if (!CachedEHContMCSymbol) { const MachineFunction *MF = getParent(); From b25adef3703b8bb3813609f8282ebf8a53b6686d Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Tue, 11 Nov 2025 21:30:47 +0000 Subject: [PATCH 02/38] feat: Add prefetch-profile.txt for testing --- build-release/prefetch-profile.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 build-release/prefetch-profile.txt diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt new file mode 100644 index 0000000000000..294f57a46920b --- /dev/null +++ b/build-release/prefetch-profile.txt @@ -0,0 +1,3 @@ +v1 +f f +t 0@1 From bbfb7ba90a5192bb78549a5cd202368872dfd09a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 12 Nov 2025 18:58:18 +0000 Subject: [PATCH 03/38] Everything else. --- build-release/prefetch-profile.txt | 3 -- .../CodeGen/BasicBlockSectionsProfileReader.h | 29 +++---------------- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 12 +++++++- .../BasicBlockSectionsProfileReader.cpp | 10 +++---- 4 files changed, 20 insertions(+), 34 deletions(-) delete mode 100644 build-release/prefetch-profile.txt diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt deleted file mode 100644 index 294f57a46920b..0000000000000 --- a/build-release/prefetch-profile.txt +++ /dev/null @@ -1,3 +0,0 @@ -v1 -f f -t 0@1 diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 5b230db30aec4..fbf9b89754cd7 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -44,7 +44,7 @@ struct BBClusterInfo { struct BBPosition { UniqueBBID BBID; - unsigned BBOffset; + unsigned CallsiteIndex; }; struct PrefetchHint { @@ -62,7 +62,7 @@ struct FunctionPathAndClusterInfo { // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; SmallVector PrefetchHints; - DenseSet PrefetchTargets; + SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; // Edge counts for each edge. @@ -73,27 +73,6 @@ struct FunctionPathAndClusterInfo { DenseMap BBHashes; }; -// Provides DenseMapInfo BBPosition. -template <> struct DenseMapInfo { - static inline BBPosition getEmptyKey() { - return {DenseMapInfo::getEmptyKey(), - DenseMapInfo::getEmptyKey()}; - } - static inline BBPosition getTombstoneKey() { - return BBPosition{DenseMapInfo::getTombstoneKey(), - DenseMapInfo::getTombstoneKey()}; - } - static unsigned getHashValue(const BBPosition &Val) { - std::pair PairVal = std::make_pair( - DenseMapInfo::getHashValue(Val.BBID), Val.BBOffset); - return DenseMapInfo>::getHashValue(PairVal); - } - static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) { - return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && - DenseMapInfo::isEqual(LHS.BBOffset, RHS.BBOffset); - } -}; - class BasicBlockSectionsProfileReader { public: friend class BasicBlockSectionsProfileReaderWrapperPass; @@ -123,7 +102,7 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; + DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; private: StringRef getAliasName(StringRef FuncName) const { @@ -236,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; + DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a204bba5789a8..90445fedd5db3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -485,6 +485,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); if (EmitBBHash) AU.addRequired(); + AU.addUsedIfAvailable(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1987,7 +1988,16 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - for (auto &MBB : *MF) { + DenseMap> FunctionPrefetchTargets; + if (auto *BBSPRPass = + getAnalysisIfAvailable()) { + FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName()); +} + + for (auto &MBB : *MF) { + + SmallVector BBPrefetchTargets; + = FunctionPrefetchTargets.lookup(MBB.g); int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index de146e172c174..c4784a6039c09 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -99,7 +99,7 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; } -DenseSet +SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) @@ -333,11 +333,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); - unsigned long long TargetBBOffset; - if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset)) + unsigned long long TargetCallsiteIndex; + if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[1]); - FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast(TargetBBOffset)}); + FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); continue; } default: @@ -552,7 +552,7 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction( return BBSPR.getPrefetchHintsForFunction(FuncName); } -DenseSet +SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { return BBSPR.getPrefetchTargetsForFunction(FuncName); From 3e6b04f94548b6a61219eae6b32ba5a46ac1461b Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 12 Nov 2025 18:58:27 +0000 Subject: [PATCH 04/38] Add test. --- llvm/test/CodeGen/X86/prefetch-symbols.ll | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 llvm/test/CodeGen/X86/prefetch-symbols.ll diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll new file mode 100644 index 0000000000000..979db7942ff2c --- /dev/null +++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll @@ -0,0 +1,42 @@ +;; Check that specifying the function in the basic block sections profile +;; without any other directives is a noop. +;; +;; Specify the bb sections profile: +; RUN: echo 'v1' > %t +; RUN: echo 'f _Z3foob' >> %t +; RUN: echo 't 0@0' >> %t +;; +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck + +define i32 @_Z3foob(i1 zeroext %0) nounwind { + %2 = alloca i32, align 4 + %3 = alloca i8, align 1 + %4 = zext i1 %0 to i8 + store i8 %4, ptr %3, align 1 + %5 = load i8, ptr %3, align 1 + %6 = trunc i8 %5 to i1 + %7 = zext i1 %6 to i32 + %8 = icmp sgt i32 %7, 0 + br i1 %8, label %9, label %11 + +9: ; preds = %1 + %10 = call i32 @_Z3barv() + store i32 %10, ptr %2, align 4 + br label %13 + +11: ; preds = %1 + %12 = call i32 @_Z3bazv() + store i32 %12, ptr %2, align 4 + br label %13 + +13: ; preds = %11, %9 + %14 = load i32, ptr %2, align 4 + ret i32 %14 +} + +declare i32 @_Z3barv() #1 +declare i32 @_Z3bazv() #1 + + +; CHECK: _Z3foob +; CHECK: llvm_prefetch_target From 996736018c889a680e2ff2d262d4496f398c9d1a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 00:10:17 +0000 Subject: [PATCH 05/38] Fix everything --- .../CodeGen/BasicBlockSectionsProfileReader.h | 4 +- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +---- llvm/include/llvm/CodeGen/Passes.h | 2 + llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 73 ++++---------- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/InsertCodePrefetch.cpp | 96 +++++++++++++++++++ llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 --- llvm/lib/CodeGen/TargetPassConfig.cpp | 1 + 9 files changed, 127 insertions(+), 88 deletions(-) create mode 100644 llvm/lib/CodeGen/InsertCodePrefetch.cpp diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index fbf9b89754cd7..1fd904d64ab9d 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -102,7 +102,7 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; private: StringRef getAliasName(StringRef FuncName) const { @@ -215,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index e6c6bc26ae9e6..4be008bbf4bf1 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -100,12 +100,6 @@ template <> struct DenseMapInfo { } }; -struct PrefetchTarget { - StringRef TargetFunction; - UniqueBBID TargetBBID; - unsigned TargetBBOffset; -}; - template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -219,8 +213,6 @@ class MachineBasicBlock /// basic block sections and basic block labels. std::optional BBID; - SmallVector PrefetchTargets; - /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -237,7 +229,7 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - mutable SmallVector CallInstSymbols; + SmallVector PrefetchTargetIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -720,12 +712,12 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargets() const { - return PrefetchTargets; + const SmallVector &getPrefetchTargetIndexes() const { + return PrefetchTargetIndexes; } - void setPrefetchTargets(const SmallVector &V) { - PrefetchTargets = V; + void setPrefetchTargetIndexes(const SmallVector &V) { + PrefetchTargetIndexes = V; } /// Returns the section ID of this basic block. @@ -1293,12 +1285,6 @@ class MachineBasicBlock /// Return the MCSymbol for this basic block. LLVM_ABI MCSymbol *getSymbol() const; - MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const; - - const SmallVector& getCallInstSymbols() const { - return CallInstSymbols; - } - /// Return the Windows EH Continuation Symbol for this basic block. LLVM_ABI MCSymbol *getEHContSymbol() const; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index a8525554b142e..f148d050a5772 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass(); LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass(); +LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass(); + /// createMachineBlockHashInfoPass - This pass computes basic block hashes. LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 10a4d8525a9e8..35d5ab14dc226 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &); LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &); LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &); LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &); +LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 90445fedd5db3..933fe6f7d177f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -179,11 +179,6 @@ static cl::opt EmitJumpTableSizesSection( cl::desc("Emit a section containing jump table addresses and sizes"), cl::Hidden, cl::init(false)); -static cl::opt InsertNoopsForPrefetch( - "insert-noops-for-prefetch", - cl::desc("Whether to insert noops instead of prefetches."), cl::init(false), - cl::Hidden); - // This isn't turned on by default, since several of the scheduling models are // not completely accurate, and we don't want to be misleading. static cl::opt PrintLatency( @@ -485,7 +480,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); if (EmitBBHash) AU.addRequired(); - AU.addUsedIfAvailable(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1988,44 +1982,29 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - DenseMap> FunctionPrefetchTargets; - if (auto *BBSPRPass = - getAnalysisIfAvailable()) { - FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName()); -} for (auto &MBB : *MF) { - - SmallVector BBPrefetchTargets; - = FunctionPrefetchTargets.lookup(MBB.g); - int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - unsigned NumCallsInBlock = 0; - for (auto &MI : MBB) { - if (NextPrefetchTargetIndex != -1 && - NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) { - MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + SmallVector PrefetchTargets = MBB.getPrefetchTargetIndexes(); + auto PrefetchTargetIt = PrefetchTargets.begin(); + unsigned NumCalls = 0; + auto EmitPrefetchTargetSymbolIfNeeded = [&]() { + if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt) + return; + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + - utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); - if (MF->getFunction().isWeakForLinker()) { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak); - errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n"; - } else { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); - errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n"; - } - // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); - // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n"; + utostr(*PrefetchTargetIt)); + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); OutStreamer->emitLabel(PrefetchTargetSymbol); - ++NextPrefetchTargetIndex; - if (NextPrefetchTargetIndex >= - static_cast(MBB.getPrefetchTargets().size())) - NextPrefetchTargetIndex = -1; - } + ++PrefetchTargetIt; + }; + + for (auto &MI : MBB) { + EmitPrefetchTargetSymbolIfNeeded(); // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugInstr()) { @@ -2163,8 +2142,11 @@ void AsmPrinter::emitFunctionBody() { break; } - if (MI.isCall() && MF->getTarget().Options.BBAddrMap) + if (MI.isCall()) { + if (MF->getTarget().Options.BBAddrMap) OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); + ++NumCalls; + } if (TM.Options.EmitCallGraphSection && MI.isCall()) handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI); @@ -2176,24 +2158,7 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } - while (NextPrefetchTargetIndex != -1) { - MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( - Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + - Twine("_") + - utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); - if (MF->getFunction().hasWeakLinkage()) { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition); - } else { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); - } - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); - OutStreamer->emitLabel(PrefetchTargetSymbol); - ++NextPrefetchTargetIndex; - if (NextPrefetchTargetIndex >= - static_cast(MBB.getPrefetchTargets().size())) - NextPrefetchTargetIndex = -1; - } - + EmitPrefetchTargetSymbolIfNeeded(); // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 1cf0b4964760b..fcf28247179ca 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen IndirectBrExpandPass.cpp InitUndef.cpp InlineSpiller.cpp + InsertCodePrefetch.cpp InterferenceCache.cpp InterleavedAccessPass.cpp InterleavedLoadCombinePass.cpp diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp new file mode 100644 index 0000000000000..7cb52302ac7db --- /dev/null +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -0,0 +1,96 @@ +//===-- InsertCodePrefetch.cpp ---=========-----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Prefetch insertion pass implementation. +//===----------------------------------------------------------------------===// +/// Prefetch insertion pass. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +#define DEBUG_TYPE "prefetchinsertion" + +namespace { +class InsertCodePrefetch : public MachineFunctionPass { +public: + static char ID; + + InsertCodePrefetch() : MachineFunctionPass(ID) { + initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Identify basic blocks that need separate sections and prepare to emit them + /// accordingly. + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +char InsertCodePrefetch::ID = 0; +INITIALIZE_PASS_BEGIN( + InsertCodePrefetch, DEBUG_TYPE, + "Reads prefetch", true, + false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) +INITIALIZE_PASS_END( + InsertCodePrefetch, DEBUG_TYPE, + "Reads prefetch", true, + false) + +bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { + assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && + "BB Sections list not enabled!"); + if (hasInstrProfHashMismatch(MF)) + return false; + SmallVector PrefetchTargets = + getAnalysis() + .getPrefetchTargetsForFunction(MF.getName()); + DenseMap> PrefetchTargetsByBBID; + for (const auto &Target: PrefetchTargets) + PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); + for (auto &MBB: MF) { + auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); + if (R == PrefetchTargetsByBBID.end()) continue; + MBB.setPrefetchTargetIndexes(R->second); + } + + return false; +} + +void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineFunctionPass *llvm::createInsertCodePrefetchPass() { + return new InsertCodePrefetch(); +} diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 19b218a2879dd..ba0b025167307 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } -MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const { - if (CallInstSymbols.size() <= CallInstNumber) { - const MachineFunction *MF = getParent(); - MCContext &Ctx = MF->getContext(); - CallInstSymbols.resize(CallInstNumber + 1); - CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol( - "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" + - Twine(CallInstNumber), - /*AlwaysEmit=*/true); - } - return CallInstSymbols[CallInstNumber]; -} - MCSymbol *MachineBasicBlock::getEHContSymbol() const { if (!CachedEHContMCSymbol) { const MachineFunction *MF = getParent(); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index ceae0d29eea90..5334c5596d018 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1291,6 +1291,7 @@ void TargetPassConfig::addMachinePasses() { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); addPass(llvm::createBasicBlockPathCloningPass()); + addPass(llvm::createInsertCodePrefetchPass()); } addPass(llvm::createBasicBlockSectionsPass()); } From a08b65a6c93963ad84ae98820973fae245637ea2 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 00:10:36 +0000 Subject: [PATCH 06/38] clang-format. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 6 +++-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 25 +++++++++++-------- .../BasicBlockSectionsProfileReader.cpp | 3 ++- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 23 ++++++++--------- 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 1fd904d64ab9d..2b8ee578cd917 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -102,7 +102,8 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector + getPrefetchTargetsForFunction(StringRef FuncName) const; private: StringRef getAliasName(StringRef FuncName) const { @@ -215,7 +216,8 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector + getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 933fe6f7d177f..4368cd4d256c9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,7 +18,6 @@ #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" -#include "llvm/Support/SMLoc.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitmaskEnum.h" @@ -120,6 +119,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -1983,7 +1983,7 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - for (auto &MBB : *MF) { + for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; @@ -1992,15 +1992,18 @@ void AsmPrinter::emitFunctionBody() { auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned NumCalls = 0; auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt) + if (PrefetchTargetIt == PrefetchTargets.end() || + NumCalls < *PrefetchTargetIt) return; MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( - Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + - Twine("_") + - utostr(*PrefetchTargetIt)); - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); - OutStreamer->emitLabel(PrefetchTargetSymbol); - ++PrefetchTargetIt; + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + + utostr(MBB.getBBID()->BaseID) + Twine("_") + + utostr(*PrefetchTargetIt)); + OutStreamer->emitSymbolAttribute( + PrefetchTargetSymbol, + MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++PrefetchTargetIt; }; for (auto &MI : MBB) { @@ -2118,7 +2121,7 @@ void AsmPrinter::emitFunctionBody() { break; } default: - emitInstruction(&MI); + emitInstruction(&MI); auto CountInstruction = [&](const MachineInstr &MI) { // Skip Meta instructions inside bundles. @@ -2144,7 +2147,7 @@ void AsmPrinter::emitFunctionBody() { if (MI.isCall()) { if (MF->getTarget().Options.BBAddrMap) - OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); + OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); ++NumCalls; } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index c4784a6039c09..9b54dd6803cf6 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -337,7 +337,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[1]); - FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); + FI->second.PrefetchTargets.push_back( + BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); continue; } default: diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 7cb52302ac7db..91cb6e599215d 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -39,7 +39,9 @@ class InsertCodePrefetch : public MachineFunctionPass { initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry()); } - StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; } + StringRef getPassName() const override { + return "X86 Cide Prefetch Inserter Pass"; + } void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -55,15 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass { //===----------------------------------------------------------------------===// char InsertCodePrefetch::ID = 0; -INITIALIZE_PASS_BEGIN( - InsertCodePrefetch, DEBUG_TYPE, - "Reads prefetch", true, - false) +INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, + false) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) -INITIALIZE_PASS_END( - InsertCodePrefetch, DEBUG_TYPE, - "Reads prefetch", true, - false) +INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, + false) bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && @@ -74,11 +72,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); DenseMap> PrefetchTargetsByBBID; - for (const auto &Target: PrefetchTargets) + for (const auto &Target : PrefetchTargets) PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); - for (auto &MBB: MF) { + for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); - if (R == PrefetchTargetsByBBID.end()) continue; + if (R == PrefetchTargetsByBBID.end()) + continue; MBB.setPrefetchTargetIndexes(R->second); } From d988a3c374d6f3212ccb4081a9321279d54ad92b Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 03:46:13 +0000 Subject: [PATCH 07/38] Fix the prefetch test. --- llvm/test/CodeGen/X86/prefetch-symbols.ll | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll index 979db7942ff2c..3eb91dfdabd27 100644 --- a/llvm/test/CodeGen/X86/prefetch-symbols.ll +++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll @@ -1,12 +1,14 @@ -;; Check that specifying the function in the basic block sections profile -;; without any other directives is a noop. +;; Check prefetch directives in basic block section profiles. ;; ;; Specify the bb sections profile: ; RUN: echo 'v1' > %t ; RUN: echo 'f _Z3foob' >> %t ; RUN: echo 't 0@0' >> %t +; RUN: echo 't 1@0' >> %t +; RUN: echo 't 1@1' >> %t +; RUN: echo 't 2@1' >> %t ;; -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck +; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -18,16 +20,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { %7 = zext i1 %6 to i32 %8 = icmp sgt i32 %7, 0 br i1 %8, label %9, label %11 +; CHECK: _Z3foob: +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_0_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_0_0: 9: ; preds = %1 %10 = call i32 @_Z3barv() store i32 %10, ptr %2, align 4 br label %13 +; CHECK: .globl __llvm_prefetch_target__Z3foob_1_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_0: +; CHECK-NEXT: callq _Z3barv@PLT +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_1_1 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_1: 11: ; preds = %1 %12 = call i32 @_Z3bazv() store i32 %12, ptr %2, align 4 br label %13 +; CHECK: callq _Z3bazv@PLT +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_2_1 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1: 13: ; preds = %11, %9 %14 = load i32, ptr %2, align 4 @@ -36,7 +49,3 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { declare i32 @_Z3barv() #1 declare i32 @_Z3bazv() #1 - - -; CHECK: _Z3foob -; CHECK: llvm_prefetch_target From 40084459b2edf0ba4d46f2c958856c33bca19d54 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 03:48:53 +0000 Subject: [PATCH 08/38] Rename the test. --- ...{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/X86/{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} (100%) diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll similarity index 100% rename from llvm/test/CodeGen/X86/prefetch-symbols.ll rename to llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll From e3b501f198f0640b7c43d73c81df74185532098e Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 06:59:43 +0000 Subject: [PATCH 09/38] Remove unrelated changes. --- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 12 ------------ llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 2b8ee578cd917..801588509d340 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -47,12 +47,6 @@ struct BBPosition { unsigned CallsiteIndex; }; -struct PrefetchHint { - BBPosition SitePosition; - StringRef TargetFunctionName; - BBPosition TargetPosition; -}; - // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { // BB Cluster information specified by `UniqueBBID`s. @@ -61,7 +55,6 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; - SmallVector PrefetchHints; SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; @@ -99,9 +92,6 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; - SmallVector - getPrefetchHintsForFunction(StringRef FuncName) const; - SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; @@ -213,8 +203,6 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; - SmallVector - getPrefetchHintsForFunction(StringRef FuncName) const; SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 9b54dd6803cf6..5b12c85f7eeef 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,12 +93,6 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } -SmallVector -BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( - StringRef FuncName) const { - return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; -} - SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { @@ -547,12 +541,6 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } -SmallVector -BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction( - StringRef FuncName) const { - return BBSPR.getPrefetchHintsForFunction(FuncName); -} - SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { From 715f1b8504521ac749324db8d70c7a98d8ddb2b3 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 18:58:09 +0000 Subject: [PATCH 10/38] Add some comments. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 19 ++++++++++---- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 -- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 26 +++++++++---------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 801588509d340..784bf8dd8f2a9 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,9 +42,14 @@ struct BBClusterInfo { unsigned PositionInCluster; }; -struct BBPosition { +// Assuming a block is split into subblocks across its callsites, this struct +// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or +// the beginning of the block if `SubblockIndex` is zero) to the call number +// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex` +// calls in the basic block). +struct SubblockID { UniqueBBID BBID; - unsigned CallsiteIndex; + unsigned SubblockIndex; }; // This represents the raw input profile for one function. @@ -55,7 +60,9 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; - SmallVector PrefetchTargets; + // Code prefetch targets, specified by the subblock ID of which beginning must + // be targetted for prefetching. + SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; // Edge counts for each edge. @@ -92,7 +99,9 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; - SmallVector + // Returns the prefetch targets (identified by their containing subblocks) for + // function `FuncName`. + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; private: @@ -204,7 +213,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; - SmallVector + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4368cd4d256c9..2d6f6687fe456 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -119,7 +119,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/SMLoc.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -1982,7 +1981,6 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 91cb6e599215d..df3d63098390b 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -1,4 +1,4 @@ -//===-- InsertCodePrefetch.cpp ---=========-----------------------------===// +//===-- InsertCodePrefetch.cpp ---=========--------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,9 +7,14 @@ //===----------------------------------------------------------------------===// // /// \file -/// Prefetch insertion pass implementation. +/// Code Prefetch Insertion Pass. //===----------------------------------------------------------------------===// -/// Prefetch insertion pass. +/// This pass inserts code prefetch instructions according to the prefetch +/// directives in the basic block section profile. The target of a prefetch can +/// be the beginning of any dynamic basic block, that is the beginning of a +/// machine basic block, or immediately after a callsite. A global symbol will +/// be emitted at the position of the target so it can be addressed from the +/// prefetch instruction. //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" @@ -20,15 +25,11 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; -#define DEBUG_TYPE "prefetchinsertion" +#define DEBUG_TYPE "insert-code-prefetch" namespace { class InsertCodePrefetch : public MachineFunctionPass { @@ -40,13 +41,12 @@ class InsertCodePrefetch : public MachineFunctionPass { } StringRef getPassName() const override { - return "X86 Cide Prefetch Inserter Pass"; + return "Code Prefetch Inserter Pass"; } void getAnalysisUsage(AnalysisUsage &AU) const override; - /// Identify basic blocks that need separate sections and prepare to emit them - /// accordingly. + // Sets prefetch targets based on the bb section profile. bool runOnMachineFunction(MachineFunction &MF) override; }; @@ -57,10 +57,10 @@ class InsertCodePrefetch : public MachineFunctionPass { //===----------------------------------------------------------------------===// char InsertCodePrefetch::ID = 0; -INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, +INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, false) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) -INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, +INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, false) bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { From a1e1e00d73a6fc8c7038a10cf0577823a74a66d6 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 18:58:22 +0000 Subject: [PATCH 11/38] clang-format. --- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 8 ++++---- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 784bf8dd8f2a9..88f3e8b620bce 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -43,10 +43,10 @@ struct BBClusterInfo { }; // Assuming a block is split into subblocks across its callsites, this struct -// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or -// the beginning of the block if `SubblockIndex` is zero) to the call number -// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex` -// calls in the basic block). +// uniquely identifies the subblock in block `BBID` which starts from right +// after call number `SubblockIndex` (or the beginning of the block if +// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of +// the block if there are are `SubblockIndex` calls in the basic block). struct SubblockID { UniqueBBID BBID; unsigned SubblockIndex; diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index df3d63098390b..29afb46a317a8 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -57,11 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass { //===----------------------------------------------------------------------===// char InsertCodePrefetch::ID = 0; -INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, - false) +INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", + true, false) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) -INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, - false) +INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", + true, false) bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && From 717e6fee27b902a14f11d1acb373188a8e17445e Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 19:16:54 +0000 Subject: [PATCH 12/38] Add comments and rename functions. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 2 +- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 15 ++++++++++----- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 13 +++++++++---- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 11 ++++++----- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 88f3e8b620bce..c2bc7559b9fb4 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -46,7 +46,7 @@ struct BBClusterInfo { // uniquely identifies the subblock in block `BBID` which starts from right // after call number `SubblockIndex` (or the beginning of the block if // `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of -// the block if there are are `SubblockIndex` calls in the basic block). +// the block if `SubblockIndex` is the last call in the block). struct SubblockID { UniqueBBID BBID; unsigned SubblockIndex; diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 4be008bbf4bf1..20427954d22e4 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -229,7 +229,12 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - SmallVector PrefetchTargetIndexes; + /// Contains the subblock indices in this block that are targets of code prefetching. + /// The subblock indexed `i` specifies that region after the `i`th call (or the + /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the + /// end of the block). The prefetch target is always the beginning of the + /// subblock. + SmallVector PrefetchTargetSubblockIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -712,12 +717,12 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargetIndexes() const { - return PrefetchTargetIndexes; + const SmallVector &getPrefetchTargetSubblockIndexes() const { + return PrefetchTargetSubblockIndexes; } - void setPrefetchTargetIndexes(const SmallVector &V) { - PrefetchTargetIndexes = V; + void setPrefetchTargetSubblockIndexes(const SmallVector &V) { + PrefetchTargetSubblockIndexes = V; } /// Returns the section ID of this basic block. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 2d6f6687fe456..72cf557d51e03 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,17 +1986,20 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetIndexes(); + SmallVector PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned NumCalls = 0; + // Helper to emit a symbol for the prefetch target and proceed to the next + // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end() || - NumCalls < *PrefetchTargetIt) - return; + if (PrefetchTargetIt == PrefetchTargets.end()) return; + if (NumCalls < *PrefetchTargetIt) return; MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(*PrefetchTargetIt)); + // If the function is weak-linkage it may be replaced by a strong version, + // in which case the prefetch targets should also be replaced. OutStreamer->emitSymbolAttribute( PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); @@ -2159,6 +2162,8 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } + // If the block ends with a call, we may need to emit a prefetch target + // at the end. EmitPrefetchTargetSymbolIfNeeded(); // We must emit temporary symbol for the end of this basic block, if either diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 29afb46a317a8..e241ccbbee263 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -12,9 +12,9 @@ /// This pass inserts code prefetch instructions according to the prefetch /// directives in the basic block section profile. The target of a prefetch can /// be the beginning of any dynamic basic block, that is the beginning of a -/// machine basic block, or immediately after a callsite. A global symbol will -/// be emitted at the position of the target so it can be addressed from the -/// prefetch instruction. +/// machine basic block, or immediately after a callsite. A global symbol is +/// emitted at the position of the target so it can be addressed from the +/// prefetch instruction from any module. //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" @@ -68,6 +68,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { "BB Sections list not enabled!"); if (hasInstrProfHashMismatch(MF)) return false; + // Set each block's prefetch targets so AsmPrinter can emit a special symbol + // there. SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); @@ -78,9 +80,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) continue; - MBB.setPrefetchTargetIndexes(R->second); + MBB.setPrefetchTargetSubblockIndexes(R->second); } - return false; } From 3605b0dddd7d66df9cd5b31d9d535f61fc8729fe Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 19:17:04 +0000 Subject: [PATCH 13/38] clang-format. --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 10 +++++----- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 9 ++++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 20427954d22e4..a13fcb2bb841d 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -229,11 +229,11 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - /// Contains the subblock indices in this block that are targets of code prefetching. - /// The subblock indexed `i` specifies that region after the `i`th call (or the - /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the - /// end of the block). The prefetch target is always the beginning of the - /// subblock. + /// Contains the subblock indices in this block that are targets of code + /// prefetching. The subblock indexed `i` specifies that region after the + /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th + /// callsite (or the end of the block). The prefetch target is always the + /// beginning of the subblock. SmallVector PrefetchTargetSubblockIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 72cf557d51e03..fb250c4b5308a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,14 +1986,17 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes(); + SmallVector PrefetchTargets = + MBB.getPrefetchTargetSubblockIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned NumCalls = 0; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end()) return; - if (NumCalls < *PrefetchTargetIt) return; + if (PrefetchTargetIt == PrefetchTargets.end()) + return; + if (NumCalls < *PrefetchTargetIt) + return; MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + From 6408bd7070d47c10c92bae014e088e358f43ce99 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 19:34:26 +0000 Subject: [PATCH 14/38] Add optimization remarks for when prefetch targets cannot be mapped. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++++++ llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++--- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fb250c4b5308a..97234f3859ca7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2168,6 +2168,14 @@ void AsmPrinter::emitFunctionBody() { // If the block ends with a call, we may need to emit a prefetch target // at the end. EmitPrefetchTargetSymbolIfNeeded(); + if (PrefetchTargetIt != PrefetchTargets.end()) { + MachineOptimizationRemarkMissed R( + "insert-code-prefetch", "MissingPrefetchTarget", + MF->getFunction().getSubprogram(), &MBB); + R << "failed to map " + << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt) + << " prefetch targets"; + } // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 5b12c85f7eeef..9319854f53289 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } -SmallVector +SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) @@ -332,7 +332,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[1]); FI->second.PrefetchTargets.push_back( - BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); + SubblockID{*TargetBBID, static_cast(TargetCallsiteIndex)}); continue; } default: @@ -541,7 +541,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } -SmallVector +SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { return BBSPR.getPrefetchTargetsForFunction(FuncName); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index e241ccbbee263..57037fd818479 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -70,12 +70,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { return false; // Set each block's prefetch targets so AsmPrinter can emit a special symbol // there. - SmallVector PrefetchTargets = + SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); DenseMap> PrefetchTargetsByBBID; for (const auto &Target : PrefetchTargets) - PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); + PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex); for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) From a06cb9d59cdc6292e2e3e4a3c6955b771f9f690a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 22:01:34 +0000 Subject: [PATCH 15/38] Expand test to weak symbols. --- .../X86/basic-block-sections-code-prefetch.ll | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 3eb91dfdabd27..35e25952aa2f8 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,6 +7,9 @@ ; RUN: echo 't 1@0' >> %t ; RUN: echo 't 1@1' >> %t ; RUN: echo 't 2@1' >> %t +; RUN: echo 'f _Z3barv' >> %t +; RUN: echo 't 0@0' >> %t +; RUN: echo 't 21@1' >> %t ;; ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s @@ -47,5 +50,12 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { ret i32 %14 } -declare i32 @_Z3barv() #1 +define weak i32 @_Z3barv() nounwind { + %1 = call i32 @_Z3bazv() + ret i32 %1 +; CHECK: _Z3barv: +; CHECK-NEXT: .weak __llvm_prefetch_target__Z3barv_0_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0: +} + declare i32 @_Z3bazv() #1 From ceefc56d610004bebd5515a7bd88e6a6117aee48 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 17 Nov 2025 21:56:48 +0000 Subject: [PATCH 16/38] Change prefetch directive format to use , instead of @ --- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 7 ++++--- .../X86/basic-block-sections-code-prefetch.ll | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 9319854f53289..3a37982387f59 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -320,10 +320,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // past-the-end element. if (FI == ProgramPathAndClusterInfo.end()) continue; - assert(Values.size() == 1); SmallVector PrefetchTargetStr; - Values[0].split(PrefetchTargetStr, '@'); - assert(PrefetchTargetStr.size() == 2); + Values[0].split(PrefetchTargetStr, ','); + if (PrefetchTargetStr.size() != 2) + return createProfileParseError( + Twine("Prefetch target target expected: ") + Value); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 35e25952aa2f8..280bfef1c79b6 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -3,13 +3,13 @@ ;; Specify the bb sections profile: ; RUN: echo 'v1' > %t ; RUN: echo 'f _Z3foob' >> %t -; RUN: echo 't 0@0' >> %t -; RUN: echo 't 1@0' >> %t -; RUN: echo 't 1@1' >> %t -; RUN: echo 't 2@1' >> %t +; RUN: echo 't 0,0' >> %t +; RUN: echo 't 1,0' >> %t +; RUN: echo 't 1,1' >> %t +; RUN: echo 't 2,1' >> %t ; RUN: echo 'f _Z3barv' >> %t -; RUN: echo 't 0@0' >> %t -; RUN: echo 't 21@1' >> %t +; RUN: echo 't 0,0' >> %t +; RUN: echo 't 21,1' >> %t ;; ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s From 639efd746fb00a06712ec7cb5afea30b233c1254 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 17 Nov 2025 22:05:27 +0000 Subject: [PATCH 17/38] Fix the error. --- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 3a37982387f59..05b6c1c3917e5 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -324,7 +324,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) return createProfileParseError( - Twine("Prefetch target target expected: ") + Value); + Twine("Prefetch target target expected: ") + Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); From cc4e3333f3d27683817a12ce4b3987263fb82bb7 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 06:42:17 +0000 Subject: [PATCH 18/38] Remove optimization remarks. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 97234f3859ca7..fb250c4b5308a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2168,14 +2168,6 @@ void AsmPrinter::emitFunctionBody() { // If the block ends with a call, we may need to emit a prefetch target // at the end. EmitPrefetchTargetSymbolIfNeeded(); - if (PrefetchTargetIt != PrefetchTargets.end()) { - MachineOptimizationRemarkMissed R( - "insert-code-prefetch", "MissingPrefetchTarget", - MF->getFunction().getSubprogram(), &MBB); - R << "failed to map " - << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt) - << " prefetch targets"; - } // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a From 6d8bdb19d705e04cae2169a08500ab52a5bfe6cf Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 20:38:00 +0000 Subject: [PATCH 19/38] Refine and polish. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 25 +++++------ llvm/include/llvm/CodeGen/MachineBasicBlock.h | 18 ++++---- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 40 ++++++++--------- .../BasicBlockSectionsProfileReader.cpp | 45 +++++++++++++++---- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++-- .../X86/basic-block-sections-code-prefetch.ll | 5 +++ 6 files changed, 84 insertions(+), 57 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index c2bc7559b9fb4..20e1b7ab68bbe 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,14 +42,11 @@ struct BBClusterInfo { unsigned PositionInCluster; }; -// Assuming a block is split into subblocks across its callsites, this struct -// uniquely identifies the subblock in block `BBID` which starts from right -// after call number `SubblockIndex` (or the beginning of the block if -// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of -// the block if `SubblockIndex` is the last call in the block). -struct SubblockID { +// The prefetch symbol is emitted immediately after the call of the given index +// in block `BBID` (or at the beginning of the block if CallsiteIndex is -1). +struct CallsiteID { UniqueBBID BBID; - unsigned SubblockIndex; + int CallsiteIndex; }; // This represents the raw input profile for one function. @@ -60,9 +57,9 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; - // Code prefetch targets, specified by the subblock ID of which beginning must - // be targetted for prefetching. - SmallVector PrefetchTargets; + // Code prefetch targets, specified by the callsite ID immediately after + // which beginning must be targetted for prefetching. + SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; // Edge counts for each edge. @@ -99,9 +96,9 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; - // Returns the prefetch targets (identified by their containing subblocks) for - // function `FuncName`. - SmallVector + // Returns the prefetch targets (identified by their containing callsite IDs) + // for function `FuncName`. + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; private: @@ -213,7 +210,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; - SmallVector + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index a13fcb2bb841d..6f48e36b55660 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -229,12 +229,10 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - /// Contains the subblock indices in this block that are targets of code - /// prefetching. The subblock indexed `i` specifies that region after the - /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th - /// callsite (or the end of the block). The prefetch target is always the - /// beginning of the subblock. - SmallVector PrefetchTargetSubblockIndexes; + /// Contains the callsite indices in this block that are targets of code + /// prefetching. The index `i` specifies the `i`th call, with `-1` + /// representing the beginning of the block. + SmallVector PrefetchTargetCallsiteIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -717,12 +715,12 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargetSubblockIndexes() const { - return PrefetchTargetSubblockIndexes; + const SmallVector &getPrefetchTargetCallsiteIndexes() const { + return PrefetchTargetCallsiteIndexes; } - void setPrefetchTargetSubblockIndexes(const SmallVector &V) { - PrefetchTargetSubblockIndexes = V; + void setPrefetchTargetCallsiteIndexes(const SmallVector &V) { + PrefetchTargetCallsiteIndexes = V; } /// Returns the section ID of this basic block. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fb250c4b5308a..1fb1bd51f6d31 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,32 +1986,31 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = - MBB.getPrefetchTargetSubblockIndexes(); + SmallVector PrefetchTargets = + MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); - unsigned NumCalls = 0; + int CurrentCallsiteIndex = -1; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end()) - return; - if (NumCalls < *PrefetchTargetIt) - return; - MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( - Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + - utostr(MBB.getBBID()->BaseID) + Twine("_") + - utostr(*PrefetchTargetIt)); - // If the function is weak-linkage it may be replaced by a strong version, - // in which case the prefetch targets should also be replaced. - OutStreamer->emitSymbolAttribute( - PrefetchTargetSymbol, - MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); - OutStreamer->emitLabel(PrefetchTargetSymbol); - ++PrefetchTargetIt; + if (PrefetchTargetIt != PrefetchTargets.end() && + *PrefetchTargetIt == CurrentCallsiteIndex) { + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + + utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(static_cast(*PrefetchTargetIt + 1))); + // If the function is weak-linkage it may be replaced by a strong + // version, in which case the prefetch targets should also be replaced. + OutStreamer->emitSymbolAttribute( + PrefetchTargetSymbol, + MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++PrefetchTargetIt; + } }; for (auto &MI : MBB) { EmitPrefetchTargetSymbolIfNeeded(); + // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugInstr()) { @@ -2152,7 +2151,7 @@ void AsmPrinter::emitFunctionBody() { if (MI.isCall()) { if (MF->getTarget().Options.BBAddrMap) OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); - ++NumCalls; + CurrentCallsiteIndex++; } if (TM.Options.EmitCallGraphSection && MI.isCall()) @@ -2165,8 +2164,7 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } - // If the block ends with a call, we may need to emit a prefetch target - // at the end. + // Emit the last prefetch target in case the last instruction was a call. EmitPrefetchTargetSymbolIfNeeded(); // We must emit temporary symbol for the end of this basic block, if either diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 05b6c1c3917e5..0f440d6a53612 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } -SmallVector +SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) @@ -155,6 +155,35 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( // +-->: 5 : // .... // **************************************************************************** +// This profile can also specify prefetch targets (starting with 't') which +// instruct the compiler to emit a prefetch symbol for the given target. +// A prefetch target is specified by a pair "," where +// bbid specifies the target basic block and subblock_index is a zero-based +// index. Subblock 0 refers to the region at the beginning of the block up to +// the first callsite. Subblock `i > 0` refers to the region immediately after +// the `i`-th callsite up to the `i+1`-th callsite (or the end of the block). +// The prefetch target is always emitted at the beginning of the subblock. +// This is the beginning of the basic block for `i = 0` and immediately after +// the `i`-th call for every `i > 0`. +// +// Example: A basic block in function "foo" with BBID 10 and two call instructions (call_A, call_B). +// This block is conceptually split into subblocks, with the prefetch target +// symbol emitted at the beginning of each subblock. +// +// +----------------------------------+ +// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A) +// | Instruction 1 | +// | Instruction 2 | +// | call_A (Callsite 0) | +// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A, +// | | before call_B) +// | Instruction 3 | +// | call_B (Callsite 1) | +// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B, +// | | before call_C) +// | Instruction 4 | +// +----------------------------------+ +// Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto FI = ProgramPathAndClusterInfo.end(); @@ -315,7 +344,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } - case 't': { // Prefetch target specifier. + case 't': { // Callsite target specifier. // Skip the profile when we the profile iterator (FI) refers to the // past-the-end element. if (FI == ProgramPathAndClusterInfo.end()) @@ -324,16 +353,16 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) return createProfileParseError( - Twine("Prefetch target target expected: ") + Values[0]); + Twine("Callsite target expected: ") + Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); - unsigned long long TargetCallsiteIndex; - if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex)) - return createProfileParseError(Twine("unsigned integer expected: '") + + long long CallsiteIndex; + if (getAsSignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex)) + return createProfileParseError(Twine("signed integer expected: '") + PrefetchTargetStr[1]); FI->second.PrefetchTargets.push_back( - SubblockID{*TargetBBID, static_cast(TargetCallsiteIndex)}); + CallsiteID{*TargetBBID, static_cast(CallsiteIndex - 1)}); continue; } default: @@ -542,7 +571,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } -SmallVector +SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { return BBSPR.getPrefetchTargetsForFunction(FuncName); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 57037fd818479..d4c25c22417b5 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -70,17 +70,17 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { return false; // Set each block's prefetch targets so AsmPrinter can emit a special symbol // there. - SmallVector PrefetchTargets = + SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); - DenseMap> PrefetchTargetsByBBID; + DenseMap> PrefetchTargetsByBBID; for (const auto &Target : PrefetchTargets) - PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex); + PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) continue; - MBB.setPrefetchTargetSubblockIndexes(R->second); + MBB.setPrefetchTargetCallsiteIndexes(R->second); } return false; } diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 280bfef1c79b6..8e23a30e273b5 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,6 +7,7 @@ ; RUN: echo 't 1,0' >> %t ; RUN: echo 't 1,1' >> %t ; RUN: echo 't 2,1' >> %t +; RUN: echo 't 4,0' >> %t ; RUN: echo 'f _Z3barv' >> %t ; RUN: echo 't 0,0' >> %t ; RUN: echo 't 21,1' >> %t @@ -48,6 +49,10 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { 13: ; preds = %11, %9 %14 = load i32, ptr %2, align 4 ret i32 %14 +; CHECK: .LBB0_3: +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_4_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_4_0: + } define weak i32 @_Z3barv() nounwind { From d93a5ecfd7cf0603f6f1c30a6b37d487251f5c88 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 20:38:13 +0000 Subject: [PATCH 20/38] clang-format. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++---- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1fb1bd51f6d31..f87896c03536a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,18 +1986,18 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = - MBB.getPrefetchTargetCallsiteIndexes(); + SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); int CurrentCallsiteIndex = -1; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { if (PrefetchTargetIt != PrefetchTargets.end() && - *PrefetchTargetIt == CurrentCallsiteIndex) { + *PrefetchTargetIt == CurrentCallsiteIndex) { MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + - utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(static_cast(*PrefetchTargetIt + 1))); + utostr(MBB.getBBID()->BaseID) + Twine("_") + + utostr(static_cast(*PrefetchTargetIt + 1))); // If the function is weak-linkage it may be replaced by a strong // version, in which case the prefetch targets should also be replaced. OutStreamer->emitSymbolAttribute( diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 0f440d6a53612..708080d7bbf0a 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -166,9 +166,10 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( // This is the beginning of the basic block for `i = 0` and immediately after // the `i`-th call for every `i > 0`. // -// Example: A basic block in function "foo" with BBID 10 and two call instructions (call_A, call_B). -// This block is conceptually split into subblocks, with the prefetch target -// symbol emitted at the beginning of each subblock. +// Example: A basic block in function "foo" with BBID 10 and two call +// instructions (call_A, call_B). This block is conceptually split into +// subblocks, with the prefetch target symbol emitted at the beginning of each +// subblock. // // +----------------------------------+ // | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A) @@ -352,8 +353,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { SmallVector PrefetchTargetStr; Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) - return createProfileParseError( - Twine("Callsite target expected: ") + Values[0]); + return createProfileParseError(Twine("Callsite target expected: ") + + Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); From 7cb4f6be1f9a0dce4592e6db859fb84773bb5e06 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:00:00 +0000 Subject: [PATCH 21/38] Change to using unsigned values for CallsiteIndex --- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 7 ++++--- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 11 ++++++----- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++---- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++--- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 7 ++++++- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 20e1b7ab68bbe..161a810298d69 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,11 +42,12 @@ struct BBClusterInfo { unsigned PositionInCluster; }; -// The prefetch symbol is emitted immediately after the call of the given index -// in block `BBID` (or at the beginning of the block if CallsiteIndex is -1). +// The prefetch symbol is emitted immediately after the call of the given index, +// in block `BBID` (First call has an index of 1). Zero callsite index means the +// start of the block. struct CallsiteID { UniqueBBID BBID; - int CallsiteIndex; + unsigned CallsiteIndex; }; // This represents the raw input profile for one function. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 6f48e36b55660..48248bd0461bc 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -230,9 +230,10 @@ class MachineBasicBlock mutable MCSymbol *CachedMCSymbol = nullptr; /// Contains the callsite indices in this block that are targets of code - /// prefetching. The index `i` specifies the `i`th call, with `-1` - /// representing the beginning of the block. - SmallVector PrefetchTargetCallsiteIndexes; + /// prefetching. The index `i` specifies the `i`th call, with zero + /// representing the beginning of the block and ` representing the first call. + /// Must be in ascending order and without duplicates. + SmallVector PrefetchTargetCallsiteIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -715,11 +716,11 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargetCallsiteIndexes() const { + const SmallVector &getPrefetchTargetCallsiteIndexes() const { return PrefetchTargetCallsiteIndexes; } - void setPrefetchTargetCallsiteIndexes(const SmallVector &V) { + void setPrefetchTargetCallsiteIndexes(const SmallVector &V) { PrefetchTargetCallsiteIndexes = V; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f87896c03536a..20e3c63b163c0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,14 +1986,14 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); + SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); - int CurrentCallsiteIndex = -1; + unsigned LastCallsiteIndex = 0; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { if (PrefetchTargetIt != PrefetchTargets.end() && - *PrefetchTargetIt == CurrentCallsiteIndex) { + *PrefetchTargetIt == LastCallsiteIndex) { MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + @@ -2151,7 +2151,7 @@ void AsmPrinter::emitFunctionBody() { if (MI.isCall()) { if (MF->getTarget().Options.BBAddrMap) OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); - CurrentCallsiteIndex++; + LastCallsiteIndex++; } if (TM.Options.EmitCallGraphSection && MI.isCall()) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 708080d7bbf0a..8762f982f72ea 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -358,12 +358,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); - long long CallsiteIndex; - if (getAsSignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex)) + unsigned long long CallsiteIndex; + if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex)) return createProfileParseError(Twine("signed integer expected: '") + PrefetchTargetStr[1]); FI->second.PrefetchTargets.push_back( - CallsiteID{*TargetBBID, static_cast(CallsiteIndex - 1)}); + CallsiteID{*TargetBBID, static_cast(CallsiteIndex)}); continue; } default: diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index d4c25c22417b5..5c3055f4ca1ea 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -73,9 +73,14 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); - DenseMap> PrefetchTargetsByBBID; + DenseMap> PrefetchTargetsByBBID; for (const auto &Target : PrefetchTargets) PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); + // Sort and uniquify the callsite indices for every block. + for (auto &[K, V]: PrefetchTargetsByBBID) { + llvm::sort(V); + V.erase(llvm::unique(V), V.end()); + } for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) From 9fdf7d0b90dcba196bfd3e5de62d1b901a797939 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:00:12 +0000 Subject: [PATCH 22/38] clang-format. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 ++- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 20e3c63b163c0..d9b2450cb8a6f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,7 +1986,8 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); + SmallVector PrefetchTargets = + MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned LastCallsiteIndex = 0; // Helper to emit a symbol for the prefetch target and proceed to the next diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 5c3055f4ca1ea..44864cbc99c52 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -77,7 +77,7 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { for (const auto &Target : PrefetchTargets) PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); // Sort and uniquify the callsite indices for every block. - for (auto &[K, V]: PrefetchTargetsByBBID) { + for (auto &[K, V] : PrefetchTargetsByBBID) { llvm::sort(V); V.erase(llvm::unique(V), V.end()); } From 0c17e45c8973ee66126f326ed85b7788319727a0 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:04:07 +0000 Subject: [PATCH 23/38] Fix AsmPrinter. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d9b2450cb8a6f..a48d333b538ec 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1998,7 +1998,7 @@ void AsmPrinter::emitFunctionBody() { MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + - utostr(static_cast(*PrefetchTargetIt + 1))); + utostr(static_cast(*PrefetchTargetIt))); // If the function is weak-linkage it may be replaced by a strong // version, in which case the prefetch targets should also be replaced. OutStreamer->emitSymbolAttribute( From 500b53670976838a277ac617013e8342ae98c65b Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:47:28 +0000 Subject: [PATCH 24/38] use -O0 --- .../CodeGen/X86/basic-block-sections-code-prefetch.ll | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 8e23a30e273b5..e5778b4b77fc2 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,12 +7,12 @@ ; RUN: echo 't 1,0' >> %t ; RUN: echo 't 1,1' >> %t ; RUN: echo 't 2,1' >> %t -; RUN: echo 't 4,0' >> %t +; RUN: echo 't 3,0' >> %t ; RUN: echo 'f _Z3barv' >> %t ; RUN: echo 't 0,0' >> %t ; RUN: echo 't 21,1' >> %t ;; -; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O0 | FileCheck %s define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -50,9 +50,8 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { %14 = load i32, ptr %2, align 4 ret i32 %14 ; CHECK: .LBB0_3: -; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_4_0 -; CHECK-NEXT: __llvm_prefetch_target__Z3foob_4_0: - +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_3_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_3_0: } define weak i32 @_Z3barv() nounwind { From a265dbcb04918ca3d4b8fc64a2add242d446f9ce Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Sat, 8 Nov 2025 19:54:21 +0000 Subject: [PATCH 25/38] feat(AsmPrinter): Add support for emitting prefetch target symbols --- .../CodeGen/BasicBlockSectionsProfileReader.h | 35 +++++++++++++++++++ llvm/include/llvm/CodeGen/MachineBasicBlock.h | 14 ++++++++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 9 ++++- .../BasicBlockSectionsProfileReader.cpp | 14 ++++++++ llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 +++++++ 5 files changed, 84 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 161a810298d69..2eac0fbef4694 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -50,6 +50,12 @@ struct CallsiteID { unsigned CallsiteIndex; }; +struct PrefetchHint { + SubblockID SitePosition; + StringRef TargetFunctionName; + osition TargetPosition; +}; + // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { // BB Cluster information specified by `UniqueBBID`s. @@ -61,6 +67,7 @@ struct FunctionPathAndClusterInfo { // Code prefetch targets, specified by the callsite ID immediately after // which beginning must be targetted for prefetching. SmallVector PrefetchTargets; + SmallVector PrefetchHints; // Node counts for each basic block. DenseMap NodeCounts; // Edge counts for each edge. @@ -71,6 +78,27 @@ struct FunctionPathAndClusterInfo { DenseMap BBHashes; }; +// Provides DenseMapInfo SubblockID. +template <> struct DenseMapInfo { + static inline SubblockID getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()}; + } + static inline SubblockID getTombstoneKey() { + return SubblockID{DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const SubblockID &Val) { + std::pair PairVal = std::make_pair( + DenseMapInfo::getHashValue(Val.BBID), Val.BBOffset); + return DenseMapInfo>::getHashValue(PairVal); + } + static bool isEqual(const SubblockID &LHS, const SubblockID &RHS) { + return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && + DenseMapInfo::isEqual(LHS.BBOffset, RHS.BBOffset); + } +}; + class BasicBlockSectionsProfileReader { public: friend class BasicBlockSectionsProfileReaderWrapperPass; @@ -102,6 +130,9 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector + getPrefetchHintsForFunction(StringRef FuncName) const; + private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -210,6 +241,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; + SmallVector + getPrefetchHintsForFunction(StringRef FuncName) const; + + DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 48248bd0461bc..2fcb877740a20 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -100,6 +100,12 @@ template <> struct DenseMapInfo { } }; +struct PrefetchTarget { + StringRef TargetFunction; + UniqueBBID TargetBBID; + unsigned TargetBBOffset; +}; + template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -213,6 +219,8 @@ class MachineBasicBlock /// basic block sections and basic block labels. std::optional BBID; + SmallVector PrefetchTargets; + /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -1289,6 +1297,12 @@ class MachineBasicBlock /// Return the MCSymbol for this basic block. LLVM_ABI MCSymbol *getSymbol() const; + MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const; + + const SmallVector& getCallInstSymbols() const { + return CallInstSymbols; + } + /// Return the Windows EH Continuation Symbol for this basic block. LLVM_ABI MCSymbol *getEHContSymbol() const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a48d333b538ec..68ecbee34f401 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,6 +18,7 @@ #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" +#include "llvm/Support/SMLoc.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitmaskEnum.h" @@ -178,6 +179,11 @@ static cl::opt EmitJumpTableSizesSection( cl::desc("Emit a section containing jump table addresses and sizes"), cl::Hidden, cl::init(false)); +static cl::opt InsertNoopsForPrefetch( + "insert-noops-for-prefetch", + cl::desc("Whether to insert noops instead of prefetches."), cl::init(false), + cl::Hidden); + // This isn't turned on by default, since several of the scheduling models are // not completely accurate, and we don't want to be misleading. static cl::opt PrintLatency( @@ -1982,6 +1988,7 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); for (auto &MBB : *MF) { + int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; @@ -2125,7 +2132,7 @@ void AsmPrinter::emitFunctionBody() { break; } default: - emitInstruction(&MI); + emitInstruction(&MI); auto CountInstruction = [&](const MachineInstr &MI) { // Skip Meta instructions inside bundles. diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 8762f982f72ea..d0f8dbb8c73d3 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -100,6 +100,14 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( .PrefetchTargets; } +SmallVector +BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; +} + + + // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m @@ -578,6 +586,12 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( return BBSPR.getPrefetchTargetsForFunction(FuncName); } +SmallVector +BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction( + StringRef FuncName) const { + return BBSPR.getPrefetchHintsForFunction(FuncName); +} + BasicBlockSectionsProfileReader & BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { return BBSPR; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index ba0b025167307..19b218a2879dd 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } +MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const { + if (CallInstSymbols.size() <= CallInstNumber) { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + CallInstSymbols.resize(CallInstNumber + 1); + CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol( + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" + + Twine(CallInstNumber), + /*AlwaysEmit=*/true); + } + return CallInstSymbols[CallInstNumber]; +} + MCSymbol *MachineBasicBlock::getEHContSymbol() const { if (!CachedEHContMCSymbol) { const MachineFunction *MF = getParent(); From fb647e6b4cf61122f3221c6e7aa7d5b3771e2da1 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Sat, 8 Nov 2025 19:54:21 +0000 Subject: [PATCH 26/38] feat(AsmPrinter): Add support for emitting prefetch target symbols --- .../BasicBlockSectionsProfileReader.cpp | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index d0f8dbb8c73d3..8a53496a834a4 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -353,7 +353,40 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } - case 't': { // Callsite target specifier. + case 'i': { // Prefetch hint specifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; + assert(Values.size() == 2); + SmallVector PrefetchSiteStr; + Values[0].split(PrefetchSiteStr, ','); + assert(PrefetchSiteStr.size() == 2); + auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]); + if (!SiteBBID) + return SiteBBID.takeError(); + unsigned long long SiteBBOffset; + if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteBBOffset)) + return createProfileParseError(Twine("unsigned integer expected: '") + + PrefetchSiteStr[1]); + + SmallVector PrefetchTargetStr; + Values[1].split(PrefetchTargetStr, ','); + assert(PrefetchTargetStr.size() == 3); + auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]); + if (!TargetBBID) + return TargetBBID.takeError(); + unsigned long long TargetBBOffset; + if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetBBOffset)) + return createProfileParseError(Twine("unsigned integer expected: '") + + PrefetchTargetStr[2]); + FI->second.PrefetchHints.push_back( + PrefetchHint{{*SiteBBID, static_cast(SiteBBOffset)}, + PrefetchTargetStr[0], + {*TargetBBID, static_cast(TargetBBOffset)}}); + continue; + } + case 't': { // Prefetch target specifier. // Skip the profile when we the profile iterator (FI) refers to the // past-the-end element. if (FI == ProgramPathAndClusterInfo.end()) From ea967e2e23fb19127a53b41f98ad3654ed661ca8 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Sun, 9 Nov 2025 05:45:18 +0000 Subject: [PATCH 27/38] feat(CodeGen): Add PrefetchInsertion pass --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 2 +- llvm/include/llvm/CodeGen/MachineInstr.h | 3 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- llvm/lib/CodeGen/BasicBlockSections.cpp | 3 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 1 + llvm/lib/Target/X86/PrefetchInsertion.cpp | 209 ++++++++++++++++++ llvm/lib/Target/X86/X86.h | 7 + llvm/lib/Target/X86/X86TargetMachine.cpp | 3 + 8 files changed, 226 insertions(+), 4 deletions(-) create mode 100644 llvm/lib/Target/X86/PrefetchInsertion.cpp diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 2fcb877740a20..c51c54c96024e 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -1299,7 +1299,7 @@ class MachineBasicBlock MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const; - const SmallVector& getCallInstSymbols() const { + const SmallVector &getCallInstSymbols() const { return CallInstSymbols; } diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index ca984459c365a..4b9a8370fa9c3 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -123,8 +123,9 @@ class MachineInstr NoUSWrap = 1 << 20, // Instruction supports geps // no unsigned signed wrap. SameSign = 1 << 21, // Both operands have the same sign. - InBounds = 1 << 22 // Pointer arithmetic remains inbounds. + InBounds = 1 << 22, // Pointer arithmetic remains inbounds. // Implies NoUSWrap. + Prefetch = 1 << 23, // Instruction is a prefetch. }; private: diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 68ecbee34f401..a3e90ace66b52 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,7 +18,6 @@ #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" -#include "llvm/Support/SMLoc.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitmaskEnum.h" @@ -120,6 +119,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 52e2909bec072..755abdbceaf4a 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -106,7 +106,8 @@ class BasicBlockSections : public MachineFunctionPass { public: static char ID; - BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr; + // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = + // nullptr; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 587c1372b19cb..47c7bbea739ae 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/FloatingPointPredicateUtils.h" diff --git a/llvm/lib/Target/X86/PrefetchInsertion.cpp b/llvm/lib/Target/X86/PrefetchInsertion.cpp new file mode 100644 index 0000000000000..720a38cb9b011 --- /dev/null +++ b/llvm/lib/Target/X86/PrefetchInsertion.cpp @@ -0,0 +1,209 @@ +//===-- PrefetchInsertion.cpp ---=========-----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Prefetch insertion pass implementation. +//===----------------------------------------------------------------------===// +/// Prefetch insertion pass. +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetMachine.h" +#include + +using namespace llvm; +#define DEBUG_TYPE "prefetchinsertion" + +static cl::opt UseCodePrefetchInstruction( + "use-code-prefetch-instruction", + cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true), + cl::Hidden); +static cl::opt PrefetchNextAddress( + "prefetch-next-address", + cl::desc( + "Whether to prefetch the next address instead of the target address."), + cl::init(false), cl::Hidden); + +namespace {} // end anonymous namespace + +namespace llvm { +class PrefetchInsertion : public MachineFunctionPass { +public: + static char ID; + + BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr; + + PrefetchInsertion() : MachineFunctionPass(ID) { + initializePrefetchInsertionPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Prefetch Insertion Pass"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Identify basic blocks that need separate sections and prepare to emit them + /// accordingly. + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // namespace llvm + +char PrefetchInsertion::ID = 0; +INITIALIZE_PASS_BEGIN( + PrefetchInsertion, "prefetch-insertion", + "Applies path clonings for the -basic-block-sections=list option", false, + false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) +INITIALIZE_PASS_END( + PrefetchInsertion, "prefetch-insertion", + "Applies path clonings for the -basic-block-sections=list option", false, + false) + +bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) { + assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && + "BB Sections list not enabled!"); + if (hasInstrProfHashMismatch(MF)) + return false; + // errs() << "Running on " << MF.getName() << "\n"; + Function &F = MF.getFunction(); + auto PtrTy = PointerType::getUnqual(F.getParent()->getContext()); + DenseSet PrefetchTargets = + getAnalysis() + .getPrefetchTargetsForFunction(MF.getName()); + // errs() << "Targets: Function: " << F.getName() << " " + // << PrefetchTargets.size() << "\n"; + DenseMap> PrefetchTargetsByBBID; + for (const auto &P : PrefetchTargets) + PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset); + for (auto &[BBID, V] : PrefetchTargetsByBBID) + llvm::sort(V); + for (auto &BB : MF) + BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]); + + for (const BBPosition &P : PrefetchTargets) { + SmallString<128> PrefetchTargetName("__llvm_prefetch_target_"); + PrefetchTargetName += F.getName(); + PrefetchTargetName += "_"; + PrefetchTargetName += utostr(P.BBID.BaseID); + PrefetchTargetName += "_"; + PrefetchTargetName += utostr(P.BBOffset); + F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy); + } + + SmallVector PrefetchHints = + getAnalysis() + .getPrefetchHintsForFunction(MF.getName()); + // errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size() + // << "\n"; + for (const PrefetchHint &H : PrefetchHints) { + SmallString<128> PrefetchTargetName("__llvm_prefetch_target_"); + PrefetchTargetName += H.TargetFunctionName; + PrefetchTargetName += "_"; + PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID); + PrefetchTargetName += "_"; + PrefetchTargetName += utostr(H.TargetPosition.BBOffset); + F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy); + } + + DenseMap>> + PrefetchHintsByBBID; + for (const auto &H : PrefetchHints) { + PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back( + PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID, + H.TargetPosition.BBOffset}); + } + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + for (auto &BB : MF) { + auto It = PrefetchHintsByBBID.find(*BB.getBBID()); + if (It == PrefetchHintsByBBID.end()) + continue; + auto BBPrefetchHintIt = It->second.begin(); + unsigned NumInsts = 0; + auto E = BB.getFirstTerminator(); + unsigned NumCallsites = 0; + for (auto I = BB.instr_begin();;) { + auto Current = I; + if (NumCallsites >= BBPrefetchHintIt->first || Current == E) { + for (const auto &PrefetchTarget : BBPrefetchHintIt->second) { + SmallString<128> PrefetchTargetName("__llvm_prefetch_target_"); + PrefetchTargetName += PrefetchTarget.TargetFunction; + PrefetchTargetName += "_"; + PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID); + PrefetchTargetName += "_"; + PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset); + auto *GV = + MF.getFunction().getParent()->getNamedValue(PrefetchTargetName); + // errs() << "Inserting prefetch for " << GV->getName() << " at " + // << MF.getName() << " " << BB.getName() << " " << NumInsts + // << "\n"; + MachineInstr *PFetch = MF.CreateMachineInstr( + UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1) + : TII->get(X86::PREFETCHT1), + Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(), + true); + PFetch->setFlag(MachineInstr::Prefetch); + MachineInstrBuilder MIB(MF, PFetch); + if (!PrefetchNextAddress) { + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8, + /*base_alignment=*/llvm::Align(1))); + } + MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister); + if (PrefetchNextAddress) + MIB.addImm(0); + else + MIB.addGlobalAddress(GV); + MIB.addReg(X86::NoRegister); + BB.insert(Current, PFetch); + } + ++BBPrefetchHintIt; + if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end()) + break; + } + if (Current != E) { + // Print the assembly for the instruction. + if (!Current->isPosition() && !Current->isImplicitDef() && + !Current->isKill() && !Current->isDebugInstr()) { + ++NumInsts; + } + if (Current->isCall()) + ++NumCallsites; + ++I; + } + } + } + return true; +} + +void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +FunctionPass *llvm::createPrefetchInsertionPass() { + return new PrefetchInsertion(); +} diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 97848bec7127e..03706aaaab237 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,6 +166,13 @@ FunctionPass *createX86IndirectThunksPass(); /// This pass replaces ret instructions with jmp's to __x86_return thunk. FunctionPass *createX86ReturnThunksPass(); +/// This pass ensures instructions featuring a memory operand +/// have distinctive (with respect to each other) +FunctionPass *createX86DiscriminateMemOpsPass(); + +/// This pass applies profiling information to insert cache prefetches. +FunctionPass *createX86InsertPrefetchPass(); + /// This pass insert wait instruction after X87 instructions which could raise /// fp exceptions when strict-fp enabled. FunctionPass *createX86InsertX87waitPass(); diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 713df63479987..60cdde37b5069 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -624,6 +624,9 @@ void X86PassConfig::addPreEmitPass2() { // after all real instructions have been added to the epilog. if (TT.isOSWindows() && TT.isX86_64()) addPass(createX86WinEHUnwindV2Pass()); + + if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) + addPass(createPrefetchInsertionPass()); } bool X86PassConfig::addPostFastRegAllocRewrite() { From ed5d4616641bc55f6ad7338038dbdc3e0f40fa4a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Sat, 15 Nov 2025 20:09:15 +0000 Subject: [PATCH 28/38] Implement inserting prefetches into the specified positions. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 33 +-- .../include/llvm/CodeGen/InsertCodePrefetch.h | 23 ++ llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 - llvm/include/llvm/CodeGen/TargetInstrInfo.h | 9 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- .../BasicBlockSectionsProfileReader.cpp | 12 +- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 50 ++++- llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 -- llvm/lib/Target/X86/PrefetchInsertion.cpp | 209 ------------------ llvm/lib/Target/X86/X86InstrInfo.cpp | 19 ++ llvm/lib/Target/X86/X86InstrInfo.h | 5 + llvm/lib/Target/X86/X86TargetMachine.cpp | 3 - 12 files changed, 119 insertions(+), 265 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/InsertCodePrefetch.h delete mode 100644 llvm/lib/Target/X86/PrefetchInsertion.cpp diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 2eac0fbef4694..cb84c703508ab 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -50,10 +50,12 @@ struct CallsiteID { unsigned CallsiteIndex; }; +// This represents a prefetch hint to be injected at site `SiteID`, targetting +// `TargetID` in function `TargetFunction`. struct PrefetchHint { - SubblockID SitePosition; - StringRef TargetFunctionName; - osition TargetPosition; + SubblockID SiteID; + StringRef TargetFunction; + SubblockID TargetID; }; // This represents the raw input profile for one function. @@ -78,27 +80,6 @@ struct FunctionPathAndClusterInfo { DenseMap BBHashes; }; -// Provides DenseMapInfo SubblockID. -template <> struct DenseMapInfo { - static inline SubblockID getEmptyKey() { - return {DenseMapInfo::getEmptyKey(), - DenseMapInfo::getEmptyKey()}; - } - static inline SubblockID getTombstoneKey() { - return SubblockID{DenseMapInfo::getTombstoneKey(), - DenseMapInfo::getTombstoneKey()}; - } - static unsigned getHashValue(const SubblockID &Val) { - std::pair PairVal = std::make_pair( - DenseMapInfo::getHashValue(Val.BBID), Val.BBOffset); - return DenseMapInfo>::getHashValue(PairVal); - } - static bool isEqual(const SubblockID &LHS, const SubblockID &RHS) { - return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && - DenseMapInfo::isEqual(LHS.BBOffset, RHS.BBOffset); - } -}; - class BasicBlockSectionsProfileReader { public: friend class BasicBlockSectionsProfileReaderWrapperPass; @@ -130,6 +111,7 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; + // Returns the prefetch hints to be injected in function `FuncName`. SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; @@ -241,11 +223,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; + SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; - SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h new file mode 100644 index 0000000000000..b212a025f1d9f --- /dev/null +++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h @@ -0,0 +1,23 @@ +//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H +#define LLVM_CODEGEN_INSERTCODEPREFETCH_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/UniqueBBID.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + +SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_INSERTCODEPREFETCH_H diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index c51c54c96024e..d1618f8b1b206 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -1297,12 +1297,6 @@ class MachineBasicBlock /// Return the MCSymbol for this basic block. LLVM_ABI MCSymbol *getSymbol() const; - MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const; - - const SmallVector &getCallInstSymbols() const { - return CallInstSymbols; - } - /// Return the Windows EH Continuation Symbol for this basic block. LLVM_ABI MCSymbol *getEHContSymbol() const; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 18142c2c0adf3..b147362b5c615 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2381,6 +2381,15 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { llvm_unreachable("unknown number of operands necessary"); } + /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB` + /// targetting `GV`. + virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const GlobalValue *GV) const { + return false; + } + + private: mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a3e90ace66b52..58742ae3f3603 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -82,6 +82,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMRemarkStreamer.h" +#include "llvm/CodeGen/InsertCodePrefetch.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -1988,7 +1989,6 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); for (auto &MBB : *MF) { - int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 8a53496a834a4..e0e7f89a2a87c 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -365,8 +365,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]); if (!SiteBBID) return SiteBBID.takeError(); - unsigned long long SiteBBOffset; - if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteBBOffset)) + unsigned long long SiteSubblockIndex; + if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteSubblockIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchSiteStr[1]); @@ -376,14 +376,14 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]); if (!TargetBBID) return TargetBBID.takeError(); - unsigned long long TargetBBOffset; - if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetBBOffset)) + unsigned long long TargetSubblockIndex; + if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetSubblockIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[2]); FI->second.PrefetchHints.push_back( - PrefetchHint{{*SiteBBID, static_cast(SiteBBOffset)}, + PrefetchHint{SubblockID{*SiteBBID, static_cast(SiteSubblockIndex)}, PrefetchTargetStr[0], - {*TargetBBID, static_cast(TargetBBOffset)}}); + SubblockID{*TargetBBID, static_cast(TargetSubblockIndex)}}); continue; } case 't': { // Prefetch target specifier. diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 44864cbc99c52..3651ac325897b 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -17,6 +17,8 @@ /// prefetch instruction from any module. //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/InsertCodePrefetch.h" + #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -26,11 +28,24 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "insert-code-prefetch" +namespace llvm { +SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex) { + SmallString<128> R("__llvm_prefetch_target_"); + R += FunctionName; + R += "_"; + R += utostr(BBID.BaseID); + R += "_"; + R += utostr(SubblockIndex); + return R; +} +} // namespace llvm + namespace { class InsertCodePrefetch : public MachineFunctionPass { public: @@ -87,7 +102,40 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { continue; MBB.setPrefetchTargetCallsiteIndexes(R->second); } - return false; + SmallVector PrefetchHints = + getAnalysis() + .getPrefetchHintsForFunction(MF.getName()); + DenseMap> + PrefetchHintsBySiteBBID; + for (const auto &H : PrefetchHints) + PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H); + for (auto &[SiteBBID, H]: PrefetchHintsBySiteBBID) { + llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) { + return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex; + }); + } + auto PtrTy = PointerType::getUnqual(MF.getFunction().getParent()->getContext()); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + for (auto &BB : MF) { + auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID()); + if (It == PrefetchHintsBySiteBBID.end()) + continue; + const auto &PrefetchHints = It->second; + unsigned NumCallsInBB = 0; + auto InstrIt = BB.begin(); + for(auto HintIt = PrefetchHints.begin() ; HintIt != PrefetchHints.end();) { + auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt); + while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) { + auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, HintIt->TargetID.SubblockIndex), PtrTy); + TII->insertCodePrefetchInstr(BB, NextInstrIt, GV); + ++HintIt; + } + if (InstrIt == BB.end()) break; + if (InstrIt->isCall()) ++NumCallsInBB; + InstrIt = NextInstrIt; + } + } + return true; } void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 19b218a2879dd..ba0b025167307 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } -MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const { - if (CallInstSymbols.size() <= CallInstNumber) { - const MachineFunction *MF = getParent(); - MCContext &Ctx = MF->getContext(); - CallInstSymbols.resize(CallInstNumber + 1); - CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol( - "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" + - Twine(CallInstNumber), - /*AlwaysEmit=*/true); - } - return CallInstSymbols[CallInstNumber]; -} - MCSymbol *MachineBasicBlock::getEHContSymbol() const { if (!CachedEHContMCSymbol) { const MachineFunction *MF = getParent(); diff --git a/llvm/lib/Target/X86/PrefetchInsertion.cpp b/llvm/lib/Target/X86/PrefetchInsertion.cpp deleted file mode 100644 index 720a38cb9b011..0000000000000 --- a/llvm/lib/Target/X86/PrefetchInsertion.cpp +++ /dev/null @@ -1,209 +0,0 @@ -//===-- PrefetchInsertion.cpp ---=========-----------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Prefetch insertion pass implementation. -//===----------------------------------------------------------------------===// -/// Prefetch insertion pass. -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/X86MCTargetDesc.h" -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86InstrInfo.h" -#include "X86MachineFunctionInfo.h" -#include "X86Subtarget.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/BasicBlockSectionUtils.h" -#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/InitializePasses.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetMachine.h" -#include - -using namespace llvm; -#define DEBUG_TYPE "prefetchinsertion" - -static cl::opt UseCodePrefetchInstruction( - "use-code-prefetch-instruction", - cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true), - cl::Hidden); -static cl::opt PrefetchNextAddress( - "prefetch-next-address", - cl::desc( - "Whether to prefetch the next address instead of the target address."), - cl::init(false), cl::Hidden); - -namespace {} // end anonymous namespace - -namespace llvm { -class PrefetchInsertion : public MachineFunctionPass { -public: - static char ID; - - BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr; - - PrefetchInsertion() : MachineFunctionPass(ID) { - initializePrefetchInsertionPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "Prefetch Insertion Pass"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - /// Identify basic blocks that need separate sections and prepare to emit them - /// accordingly. - bool runOnMachineFunction(MachineFunction &MF) override; -}; - -} // namespace llvm - -char PrefetchInsertion::ID = 0; -INITIALIZE_PASS_BEGIN( - PrefetchInsertion, "prefetch-insertion", - "Applies path clonings for the -basic-block-sections=list option", false, - false) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) -INITIALIZE_PASS_END( - PrefetchInsertion, "prefetch-insertion", - "Applies path clonings for the -basic-block-sections=list option", false, - false) - -bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) { - assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && - "BB Sections list not enabled!"); - if (hasInstrProfHashMismatch(MF)) - return false; - // errs() << "Running on " << MF.getName() << "\n"; - Function &F = MF.getFunction(); - auto PtrTy = PointerType::getUnqual(F.getParent()->getContext()); - DenseSet PrefetchTargets = - getAnalysis() - .getPrefetchTargetsForFunction(MF.getName()); - // errs() << "Targets: Function: " << F.getName() << " " - // << PrefetchTargets.size() << "\n"; - DenseMap> PrefetchTargetsByBBID; - for (const auto &P : PrefetchTargets) - PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset); - for (auto &[BBID, V] : PrefetchTargetsByBBID) - llvm::sort(V); - for (auto &BB : MF) - BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]); - - for (const BBPosition &P : PrefetchTargets) { - SmallString<128> PrefetchTargetName("__llvm_prefetch_target_"); - PrefetchTargetName += F.getName(); - PrefetchTargetName += "_"; - PrefetchTargetName += utostr(P.BBID.BaseID); - PrefetchTargetName += "_"; - PrefetchTargetName += utostr(P.BBOffset); - F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy); - } - - SmallVector PrefetchHints = - getAnalysis() - .getPrefetchHintsForFunction(MF.getName()); - // errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size() - // << "\n"; - for (const PrefetchHint &H : PrefetchHints) { - SmallString<128> PrefetchTargetName("__llvm_prefetch_target_"); - PrefetchTargetName += H.TargetFunctionName; - PrefetchTargetName += "_"; - PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID); - PrefetchTargetName += "_"; - PrefetchTargetName += utostr(H.TargetPosition.BBOffset); - F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy); - } - - DenseMap>> - PrefetchHintsByBBID; - for (const auto &H : PrefetchHints) { - PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back( - PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID, - H.TargetPosition.BBOffset}); - } - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - for (auto &BB : MF) { - auto It = PrefetchHintsByBBID.find(*BB.getBBID()); - if (It == PrefetchHintsByBBID.end()) - continue; - auto BBPrefetchHintIt = It->second.begin(); - unsigned NumInsts = 0; - auto E = BB.getFirstTerminator(); - unsigned NumCallsites = 0; - for (auto I = BB.instr_begin();;) { - auto Current = I; - if (NumCallsites >= BBPrefetchHintIt->first || Current == E) { - for (const auto &PrefetchTarget : BBPrefetchHintIt->second) { - SmallString<128> PrefetchTargetName("__llvm_prefetch_target_"); - PrefetchTargetName += PrefetchTarget.TargetFunction; - PrefetchTargetName += "_"; - PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID); - PrefetchTargetName += "_"; - PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset); - auto *GV = - MF.getFunction().getParent()->getNamedValue(PrefetchTargetName); - // errs() << "Inserting prefetch for " << GV->getName() << " at " - // << MF.getName() << " " << BB.getName() << " " << NumInsts - // << "\n"; - MachineInstr *PFetch = MF.CreateMachineInstr( - UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1) - : TII->get(X86::PREFETCHT1), - Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(), - true); - PFetch->setFlag(MachineInstr::Prefetch); - MachineInstrBuilder MIB(MF, PFetch); - if (!PrefetchNextAddress) { - MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8, - /*base_alignment=*/llvm::Align(1))); - } - MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister); - if (PrefetchNextAddress) - MIB.addImm(0); - else - MIB.addGlobalAddress(GV); - MIB.addReg(X86::NoRegister); - BB.insert(Current, PFetch); - } - ++BBPrefetchHintIt; - if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end()) - break; - } - if (Current != E) { - // Print the assembly for the instruction. - if (!Current->isPosition() && !Current->isImplicitDef() && - !Current->isKill() && !Current->isDebugInstr()) { - ++NumInsts; - } - if (Current->isCall()) - ++NumCallsites; - ++I; - } - } - } - return true; -} - -void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -FunctionPass *llvm::createPrefetchInsertionPass() { - return new PrefetchInsertion(); -} diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index cb0208a4a5f32..b49ef06478f9b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10978,5 +10978,24 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl &Ops, M.getFullAddress(Ops); } +bool X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const GlobalValue *GV) const { + MachineFunction &MF = *MBB.getParent(); + MachineInstr *PrefetchInstr = MF.CreateMachineInstr(get(X86::PREFETCHIT1), + InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore) : InsertBefore->getDebugLoc(), + true); + MachineInstrBuilder MIB(MF, PrefetchInstr); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8, + /*base_alignment=*/llvm::Align(1))); + MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister); + MIB.addGlobalAddress(GV); + MIB.addReg(X86::NoRegister); + MBB.insert(InsertBefore, PrefetchInstr); + return true; +} + + #define GET_INSTRINFO_HELPERS #include "X86GenInstrInfo.inc" diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index a547fcd421411..fb77ac96ceaad 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -767,6 +767,11 @@ class X86InstrInfo final : public X86GenInstrInfo { /// \returns the index of operand that is commuted with \p Idx1. If the method /// fails to commute the operands, it will return \p Idx1. unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const; + + + bool insertCodePrefetchInstr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const GlobalValue *GV) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 60cdde37b5069..713df63479987 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -624,9 +624,6 @@ void X86PassConfig::addPreEmitPass2() { // after all real instructions have been added to the epilog. if (TT.isOSWindows() && TT.isX86_64()) addPass(createX86WinEHUnwindV2Pass()); - - if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) - addPass(createPrefetchInsertionPass()); } bool X86PassConfig::addPostFastRegAllocRewrite() { From e9a2af59d45e5138ad8f390934fc146bf0776823 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Sat, 15 Nov 2025 20:09:27 +0000 Subject: [PATCH 29/38] clang-format. --- .../include/llvm/CodeGen/InsertCodePrefetch.h | 6 ++- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 1 - llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- .../BasicBlockSectionsProfileReader.cpp | 8 ++-- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 40 +++++++++++-------- llvm/lib/Target/X86/X86InstrInfo.cpp | 33 +++++++-------- llvm/lib/Target/X86/X86InstrInfo.h | 5 +-- 7 files changed, 52 insertions(+), 43 deletions(-) diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h index b212a025f1d9f..99241248862d3 100644 --- a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h +++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h @@ -11,12 +11,14 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Support/UniqueBBID.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/UniqueBBID.h" namespace llvm { -SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex); +SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, + const UniqueBBID &BBID, + unsigned SubblockIndex); } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index b147362b5c615..118b0b8ec7f82 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -2389,7 +2389,6 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { return false; } - private: mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 58742ae3f3603..e3db952994216 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -39,6 +39,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/InsertCodePrefetch.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockHashInfo.h" @@ -82,7 +83,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMRemarkStreamer.h" -#include "llvm/CodeGen/InsertCodePrefetch.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index e0e7f89a2a87c..dc2befc0f4612 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -380,10 +380,10 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetSubblockIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[2]); - FI->second.PrefetchHints.push_back( - PrefetchHint{SubblockID{*SiteBBID, static_cast(SiteSubblockIndex)}, - PrefetchTargetStr[0], - SubblockID{*TargetBBID, static_cast(TargetSubblockIndex)}}); + FI->second.PrefetchHints.push_back(PrefetchHint{ + SubblockID{*SiteBBID, static_cast(SiteSubblockIndex)}, + PrefetchTargetStr[0], + SubblockID{*TargetBBID, static_cast(TargetSubblockIndex)}}); continue; } case 't': { // Prefetch target specifier. diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 3651ac325897b..9f27f544c3f9b 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -35,14 +35,16 @@ using namespace llvm; #define DEBUG_TYPE "insert-code-prefetch" namespace llvm { -SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex) { - SmallString<128> R("__llvm_prefetch_target_"); - R += FunctionName; - R += "_"; - R += utostr(BBID.BaseID); - R += "_"; - R += utostr(SubblockIndex); - return R; +SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, + const UniqueBBID &BBID, + unsigned SubblockIndex) { + SmallString<128> R("__llvm_prefetch_target_"); + R += FunctionName; + R += "_"; + R += utostr(BBID.BaseID); + R += "_"; + R += utostr(SubblockIndex); + return R; } } // namespace llvm @@ -105,16 +107,16 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { SmallVector PrefetchHints = getAnalysis() .getPrefetchHintsForFunction(MF.getName()); - DenseMap> - PrefetchHintsBySiteBBID; + DenseMap> PrefetchHintsBySiteBBID; for (const auto &H : PrefetchHints) PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H); - for (auto &[SiteBBID, H]: PrefetchHintsBySiteBBID) { + for (auto &[SiteBBID, H] : PrefetchHintsBySiteBBID) { llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) { return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex; }); } - auto PtrTy = PointerType::getUnqual(MF.getFunction().getParent()->getContext()); + auto PtrTy = + PointerType::getUnqual(MF.getFunction().getParent()->getContext()); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); for (auto &BB : MF) { auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID()); @@ -123,15 +125,21 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { const auto &PrefetchHints = It->second; unsigned NumCallsInBB = 0; auto InstrIt = BB.begin(); - for(auto HintIt = PrefetchHints.begin() ; HintIt != PrefetchHints.end();) { + for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) { auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt); while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) { - auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, HintIt->TargetID.SubblockIndex), PtrTy); + auto *GV = MF.getFunction().getParent()->getOrInsertGlobal( + getPrefetchTargetSymbolName(HintIt->TargetFunction, + HintIt->TargetID.BBID, + HintIt->TargetID.SubblockIndex), + PtrTy); TII->insertCodePrefetchInstr(BB, NextInstrIt, GV); ++HintIt; } - if (InstrIt == BB.end()) break; - if (InstrIt->isCall()) ++NumCallsInBB; + if (InstrIt == BB.end()) + break; + if (InstrIt->isCall()) + ++NumCallsInBB; InstrIt = NextInstrIt; } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index b49ef06478f9b..6556e16241557 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10978,24 +10978,25 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl &Ops, M.getFullAddress(Ops); } -bool X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, - const GlobalValue *GV) const { +bool X86InstrInfo::insertCodePrefetchInstr( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + const GlobalValue *GV) const { MachineFunction &MF = *MBB.getParent(); - MachineInstr *PrefetchInstr = MF.CreateMachineInstr(get(X86::PREFETCHIT1), - InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore) : InsertBefore->getDebugLoc(), - true); - MachineInstrBuilder MIB(MF, PrefetchInstr); - MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8, - /*base_alignment=*/llvm::Align(1))); - MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister); - MIB.addGlobalAddress(GV); - MIB.addReg(X86::NoRegister); - MBB.insert(InsertBefore, PrefetchInstr); - return true; + MachineInstr *PrefetchInstr = MF.CreateMachineInstr( + get(X86::PREFETCHIT1), + InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore) + : InsertBefore->getDebugLoc(), + true); + MachineInstrBuilder MIB(MF, PrefetchInstr); + MIB.addMemOperand(MF.getMachineMemOperand(MachinePointerInfo(GV), + MachineMemOperand::MOLoad, /*s=*/8, + /*base_alignment=*/llvm::Align(1))); + MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister); + MIB.addGlobalAddress(GV); + MIB.addReg(X86::NoRegister); + MBB.insert(InsertBefore, PrefetchInstr); + return true; } - #define GET_INSTRINFO_HELPERS #include "X86GenInstrInfo.inc" diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index fb77ac96ceaad..2fe67c56e1bcd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -768,10 +768,9 @@ class X86InstrInfo final : public X86GenInstrInfo { /// fails to commute the operands, it will return \p Idx1. unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const; - bool insertCodePrefetchInstr(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, - const GlobalValue *GV) const override; + MachineBasicBlock::iterator InsertBefore, + const GlobalValue *GV) const override; }; } // namespace llvm From b86fc143b238fcf6f8a4125d56aea17ace84aba7 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 17 Nov 2025 20:37:25 +0000 Subject: [PATCH 30/38] Expand the test case to prefetch hints. --- .../BasicBlockSectionsProfileReader.cpp | 15 +++++++++----- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++++---- .../X86/basic-block-sections-code-prefetch.ll | 20 +++++++++++++++++-- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index dc2befc0f4612..b17cccdad6467 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -358,10 +358,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // past-the-end element. if (FI == ProgramPathAndClusterInfo.end()) continue; - assert(Values.size() == 2); + if (Values.size() != 2) + return createProfileParseError(Twine("Prefetch hint expected: "+ S)); SmallVector PrefetchSiteStr; Values[0].split(PrefetchSiteStr, ','); - assert(PrefetchSiteStr.size() == 2); + if (PrefetchSiteStr.size() != 2) + return createProfileParseError(Twine("Prefetch site expected: ") + Values[0]); auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]); if (!SiteBBID) return SiteBBID.takeError(); @@ -372,7 +374,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { SmallVector PrefetchTargetStr; Values[1].split(PrefetchTargetStr, ','); - assert(PrefetchTargetStr.size() == 3); + if (PrefetchTargetStr.size() != 3) + return createProfileParseError(Twine("Prefetch target target expected: ") + Values[1]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]); if (!TargetBBID) return TargetBBID.takeError(); @@ -392,10 +395,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { if (FI == ProgramPathAndClusterInfo.end()) continue; SmallVector PrefetchTargetStr; + if (Values.size() != 1) + return createProfileParseError(Twine("Prefetch target expected: ")+ S); + SmallVector PrefetchTargetStr; Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) - return createProfileParseError(Twine("Callsite target expected: ") + - Values[0]); + return createProfileParseError(Twine("Prefetch target expected: ")+ Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 9f27f544c3f9b..0dd697056a496 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -110,8 +110,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { DenseMap> PrefetchHintsBySiteBBID; for (const auto &H : PrefetchHints) PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H); - for (auto &[SiteBBID, H] : PrefetchHintsBySiteBBID) { - llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) { + for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) { + llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) { return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex; }); } @@ -127,13 +127,13 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { auto InstrIt = BB.begin(); for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) { auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt); - while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) { + while (HintIt != PrefetchHints.end() && NumCallsInBB >= HintIt->SiteID.SubblockIndex) { auto *GV = MF.getFunction().getParent()->getOrInsertGlobal( getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, HintIt->TargetID.SubblockIndex), PtrTy); - TII->insertCodePrefetchInstr(BB, NextInstrIt, GV); + TII->insertCodePrefetchInstr(BB, InstrIt, GV); ++HintIt; } if (InstrIt == BB.end()) diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index e5778b4b77fc2..e030b104bde76 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,12 +7,19 @@ ; RUN: echo 't 1,0' >> %t ; RUN: echo 't 1,1' >> %t ; RUN: echo 't 2,1' >> %t +<<<<<<< HEAD ; RUN: echo 't 3,0' >> %t +======= +; RUN: echo 't 4,0' >> %t +; RUN: echo 'i 3@0 _Z3barv@0@0' >> %t +; RUN: echo 'i 2@1 _Z3foob@1@0' >> %t +>>>>>>> d2ddce6b2050 (Expand the test case to prefetch hints.) ; RUN: echo 'f _Z3barv' >> %t ; RUN: echo 't 0,0' >> %t ; RUN: echo 't 21,1' >> %t +; RUN: echo 'i 0@1 _Z3foob@0@0' >> %t ;; -; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O0 | FileCheck %s +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -45,6 +52,7 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { ; CHECK: callq _Z3bazv@PLT ; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_2_1 ; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1: +; CHECK-NEXT: prefetchit1 __llvm_prefetch_target__Z3foob_1_0(%rip) 13: ; preds = %11, %9 %14 = load i32, ptr %2, align 4 @@ -52,14 +60,22 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { ; CHECK: .LBB0_3: ; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_3_0 ; CHECK-NEXT: __llvm_prefetch_target__Z3foob_3_0: +; CHECK-NEXT: prefetchit1 __llvm_prefetch_target__Z3barv_0_0(%rip) +; CHECK: retq + +>>>>>>> d2ddce6b2050 (Expand the test case to prefetch hints.) } define weak i32 @_Z3barv() nounwind { %1 = call i32 @_Z3bazv() - ret i32 %1 + br label %2 ; CHECK: _Z3barv: ; CHECK-NEXT: .weak __llvm_prefetch_target__Z3barv_0_0 ; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0: +; CHECK: callq _Z3bazv@PLT +; CHECK-NEXT: prefetchit1 __llvm_prefetch_target__Z3foob_0_0(%rip) +2: + ret i32 %1 } declare i32 @_Z3bazv() #1 From d15a92ecba5c32151f351897d3627c0759f50edb Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 17 Nov 2025 20:37:36 +0000 Subject: [PATCH 31/38] clang-format. --- .../lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 13 ++++++++----- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 3 ++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index b17cccdad6467..c88d6b18d82f5 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -359,11 +359,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { if (FI == ProgramPathAndClusterInfo.end()) continue; if (Values.size() != 2) - return createProfileParseError(Twine("Prefetch hint expected: "+ S)); + return createProfileParseError(Twine("Prefetch hint expected: " + S)); SmallVector PrefetchSiteStr; Values[0].split(PrefetchSiteStr, ','); if (PrefetchSiteStr.size() != 2) - return createProfileParseError(Twine("Prefetch site expected: ") + Values[0]); + return createProfileParseError(Twine("Prefetch site expected: ") + + Values[0]); auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]); if (!SiteBBID) return SiteBBID.takeError(); @@ -375,7 +376,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { SmallVector PrefetchTargetStr; Values[1].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 3) - return createProfileParseError(Twine("Prefetch target target expected: ") + Values[1]); + return createProfileParseError( + Twine("Prefetch target target expected: ") + Values[1]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]); if (!TargetBBID) return TargetBBID.takeError(); @@ -396,11 +398,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { continue; SmallVector PrefetchTargetStr; if (Values.size() != 1) - return createProfileParseError(Twine("Prefetch target expected: ")+ S); + return createProfileParseError(Twine("Prefetch target expected: ") + S); SmallVector PrefetchTargetStr; Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) - return createProfileParseError(Twine("Prefetch target expected: ")+ Values[0]); + return createProfileParseError(Twine("Prefetch target expected: ") + + Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 0dd697056a496..d0bd3eda6d05c 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -127,7 +127,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { auto InstrIt = BB.begin(); for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) { auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt); - while (HintIt != PrefetchHints.end() && NumCallsInBB >= HintIt->SiteID.SubblockIndex) { + while (HintIt != PrefetchHints.end() && + NumCallsInBB >= HintIt->SiteID.SubblockIndex) { auto *GV = MF.getFunction().getParent()->getOrInsertGlobal( getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, From 1162d695ca8b4b7a75e547b4edbdded55a1f43b6 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 06:39:40 +0000 Subject: [PATCH 32/38] Remove some unwanted changes. --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 ------ llvm/include/llvm/CodeGen/MachineInstr.h | 3 +-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 +------- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index d1618f8b1b206..be2fe2b3ef80b 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -100,12 +100,6 @@ template <> struct DenseMapInfo { } }; -struct PrefetchTarget { - StringRef TargetFunction; - UniqueBBID TargetBBID; - unsigned TargetBBOffset; -}; - template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 4b9a8370fa9c3..ca984459c365a 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -123,9 +123,8 @@ class MachineInstr NoUSWrap = 1 << 20, // Instruction supports geps // no unsigned signed wrap. SameSign = 1 << 21, // Both operands have the same sign. - InBounds = 1 << 22, // Pointer arithmetic remains inbounds. + InBounds = 1 << 22 // Pointer arithmetic remains inbounds. // Implies NoUSWrap. - Prefetch = 1 << 23, // Instruction is a prefetch. }; private: diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e3db952994216..79d4ff8fef27b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -120,7 +120,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/SMLoc.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -180,11 +179,6 @@ static cl::opt EmitJumpTableSizesSection( cl::desc("Emit a section containing jump table addresses and sizes"), cl::Hidden, cl::init(false)); -static cl::opt InsertNoopsForPrefetch( - "insert-noops-for-prefetch", - cl::desc("Whether to insert noops instead of prefetches."), cl::init(false), - cl::Hidden); - // This isn't turned on by default, since several of the scheduling models are // not completely accurate, and we don't want to be misleading. static cl::opt PrintLatency( @@ -2132,7 +2126,7 @@ void AsmPrinter::emitFunctionBody() { break; } default: - emitInstruction(&MI); + emitInstruction(&MI); auto CountInstruction = [&](const MachineInstr &MI) { // Skip Meta instructions inside bundles. From ffa1762644c996734062a289eedf9c1324f6f4b0 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:44:05 +0000 Subject: [PATCH 33/38] Fix references. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 4 ++-- .../BasicBlockSectionsProfileReader.cpp | 23 +++++++++---------- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 10 ++++---- .../X86/basic-block-sections-code-prefetch.ll | 12 +++------- 4 files changed, 21 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index cb84c703508ab..891f50c9696ad 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -53,9 +53,9 @@ struct CallsiteID { // This represents a prefetch hint to be injected at site `SiteID`, targetting // `TargetID` in function `TargetFunction`. struct PrefetchHint { - SubblockID SiteID; + CallsiteID SiteID; StringRef TargetFunction; - SubblockID TargetID; + CallsiteID TargetID; }; // This represents the raw input profile for one function. diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index c88d6b18d82f5..9e20c6578fbc0 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -167,8 +167,8 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( // instruct the compiler to emit a prefetch symbol for the given target. // A prefetch target is specified by a pair "," where // bbid specifies the target basic block and subblock_index is a zero-based -// index. Subblock 0 refers to the region at the beginning of the block up to -// the first callsite. Subblock `i > 0` refers to the region immediately after +// index. Callsite 0 refers to the region at the beginning of the block up to +// the first callsite. Callsite `i > 0` refers to the region immediately after // the `i`-th callsite up to the `i+1`-th callsite (or the end of the block). // The prefetch target is always emitted at the beginning of the subblock. // This is the beginning of the basic block for `i = 0` and immediately after @@ -180,15 +180,15 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( // subblock. // // +----------------------------------+ -// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A) +// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A) // | Instruction 1 | // | Instruction 2 | // | call_A (Callsite 0) | -// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A, +// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A, // | | before call_B) // | Instruction 3 | // | call_B (Callsite 1) | -// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B, +// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B, // | | before call_C) // | Instruction 4 | // +----------------------------------+ @@ -368,8 +368,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]); if (!SiteBBID) return SiteBBID.takeError(); - unsigned long long SiteSubblockIndex; - if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteSubblockIndex)) + unsigned long long SiteCallsiteIndex; + if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteCallsiteIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchSiteStr[1]); @@ -381,14 +381,14 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]); if (!TargetBBID) return TargetBBID.takeError(); - unsigned long long TargetSubblockIndex; - if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetSubblockIndex)) + unsigned long long TargetCallsiteIndex; + if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetCallsiteIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[2]); FI->second.PrefetchHints.push_back(PrefetchHint{ - SubblockID{*SiteBBID, static_cast(SiteSubblockIndex)}, + CallsiteID{*SiteBBID, static_cast(SiteCallsiteIndex)}, PrefetchTargetStr[0], - SubblockID{*TargetBBID, static_cast(TargetSubblockIndex)}}); + CallsiteID{*TargetBBID, static_cast(TargetCallsiteIndex)}}); continue; } case 't': { // Prefetch target specifier. @@ -399,7 +399,6 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { SmallVector PrefetchTargetStr; if (Values.size() != 1) return createProfileParseError(Twine("Prefetch target expected: ") + S); - SmallVector PrefetchTargetStr; Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) return createProfileParseError(Twine("Prefetch target expected: ") + diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index d0bd3eda6d05c..44295b66a1c86 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -37,13 +37,13 @@ using namespace llvm; namespace llvm { SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, - unsigned SubblockIndex) { + unsigned CallsiteIndex) { SmallString<128> R("__llvm_prefetch_target_"); R += FunctionName; R += "_"; R += utostr(BBID.BaseID); R += "_"; - R += utostr(SubblockIndex); + R += utostr(CallsiteIndex); return R; } } // namespace llvm @@ -112,7 +112,7 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H); for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) { llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) { - return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex; + return H1.SiteID.CallsiteIndex < H2.SiteID.CallsiteIndex; }); } auto PtrTy = @@ -128,11 +128,11 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) { auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt); while (HintIt != PrefetchHints.end() && - NumCallsInBB >= HintIt->SiteID.SubblockIndex) { + NumCallsInBB >= HintIt->SiteID.CallsiteIndex) { auto *GV = MF.getFunction().getParent()->getOrInsertGlobal( getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, - HintIt->TargetID.SubblockIndex), + HintIt->TargetID.CallsiteIndex), PtrTy); TII->insertCodePrefetchInstr(BB, InstrIt, GV); ++HintIt; diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index e030b104bde76..81fdccbbf73af 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,17 +7,13 @@ ; RUN: echo 't 1,0' >> %t ; RUN: echo 't 1,1' >> %t ; RUN: echo 't 2,1' >> %t -<<<<<<< HEAD ; RUN: echo 't 3,0' >> %t -======= -; RUN: echo 't 4,0' >> %t -; RUN: echo 'i 3@0 _Z3barv@0@0' >> %t -; RUN: echo 'i 2@1 _Z3foob@1@0' >> %t ->>>>>>> d2ddce6b2050 (Expand the test case to prefetch hints.) +; RUN: echo 'i 3,0 _Z3barv,0,0' >> %t +; RUN: echo 'i 2,1 _Z3foob,1,0' >> %t ; RUN: echo 'f _Z3barv' >> %t ; RUN: echo 't 0,0' >> %t ; RUN: echo 't 21,1' >> %t -; RUN: echo 'i 0@1 _Z3foob@0@0' >> %t +; RUN: echo 'i 0,1 _Z3foob,0,0' >> %t ;; ; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s @@ -62,8 +58,6 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { ; CHECK-NEXT: __llvm_prefetch_target__Z3foob_3_0: ; CHECK-NEXT: prefetchit1 __llvm_prefetch_target__Z3barv_0_0(%rip) ; CHECK: retq - ->>>>>>> d2ddce6b2050 (Expand the test case to prefetch hints.) } define weak i32 @_Z3barv() nounwind { From e1fc72794fd37e6752fd104cd1ac5e2314c6908b Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:44:18 +0000 Subject: [PATCH 34/38] clang-format. --- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 9e20c6578fbc0..5a6f38006ded3 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -106,8 +106,6 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; } - - // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m From 829bbe2ec644881524ddaca16cff88430a2a3d3c Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 22:19:13 +0000 Subject: [PATCH 35/38] Explain the prefetch hint. --- .../BasicBlockSectionsProfileReader.cpp | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 5a6f38006ded3..f33d801e3a03d 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -162,7 +162,10 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( // .... // **************************************************************************** // This profile can also specify prefetch targets (starting with 't') which -// instruct the compiler to emit a prefetch symbol for the given target. +// instruct the compiler to emit a prefetch symbol for the given target and +// prefetch hints (start with 'i') which instruct the compiler to insert a +// prefetch hint instruction at the given site for the given target. +// // A prefetch target is specified by a pair "," where // bbid specifies the target basic block and subblock_index is a zero-based // index. Callsite 0 refers to the region at the beginning of the block up to @@ -172,6 +175,11 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( // This is the beginning of the basic block for `i = 0` and immediately after // the `i`-th call for every `i > 0`. // +/ +// A prefetch int is specified by a pair "site target", where site is specified +// as a pair "," similar to prefetch targets, and target +// is specified as a triple ",,". +// // Example: A basic block in function "foo" with BBID 10 and two call // instructions (call_A, call_B). This block is conceptually split into // subblocks, with the prefetch target symbol emitted at the beginning of each @@ -191,6 +199,16 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( // | Instruction 4 | // +----------------------------------+ // +// A prefetch hint specified in function "bar" as "120,1 foo,10,2" results in a +// a hint inserted after the first call in block #120 of bar: +// B +// +----------------------------------------------------+ +// | Instruction 1 | +// | call_C (Callsite 1) | +// | code_prefetch __llvm_prfetch_target_foo_10 | +// | Instruction 2 | +// +----------------------------------------------------+ +// Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto FI = ProgramPathAndClusterInfo.end(); From c5b9a12dfde033336468718f0c14fec2d6f27f56 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 22:24:11 +0000 Subject: [PATCH 36/38] Fix it. --- .../BasicBlockSectionsProfileReader.cpp | 68 +++++++++---------- llvm/lib/Target/X86/X86.h | 7 -- 2 files changed, 34 insertions(+), 41 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index f33d801e3a03d..e7b33f85bb7fe 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -175,40 +175,40 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( // This is the beginning of the basic block for `i = 0` and immediately after // the `i`-th call for every `i > 0`. // -/ -// A prefetch int is specified by a pair "site target", where site is specified -// as a pair "," similar to prefetch targets, and target -// is specified as a triple ",,". -// -// Example: A basic block in function "foo" with BBID 10 and two call -// instructions (call_A, call_B). This block is conceptually split into -// subblocks, with the prefetch target symbol emitted at the beginning of each -// subblock. -// -// +----------------------------------+ -// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A) -// | Instruction 1 | -// | Instruction 2 | -// | call_A (Callsite 0) | -// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A, -// | | before call_B) -// | Instruction 3 | -// | call_B (Callsite 1) | -// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B, -// | | before call_C) -// | Instruction 4 | -// +----------------------------------+ -// -// A prefetch hint specified in function "bar" as "120,1 foo,10,2" results in a -// a hint inserted after the first call in block #120 of bar: -// B -// +----------------------------------------------------+ -// | Instruction 1 | -// | call_C (Callsite 1) | -// | code_prefetch __llvm_prfetch_target_foo_10 | -// | Instruction 2 | -// +----------------------------------------------------+ -// +// A prefetch int is specified by a pair "site target", where site is +// specified as a pair "," similar to prefetch +// targets, and target is specified as a triple +// ",,". + // + // Example: A basic block in function "foo" with BBID 10 and two call + // instructions (call_A, call_B). This block is conceptually split into + // subblocks, with the prefetch target symbol emitted at the beginning of + // each subblock. + // + // +----------------------------------+ + // | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A) + // | Instruction 1 | + // | Instruction 2 | + // | call_A (Callsite 0) | + // | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A, + // | | before call_B) + // | Instruction 3 | + // | call_B (Callsite 1) | + // | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B, + // | | before call_C) + // | Instruction 4 | + // +----------------------------------+ + // + // A prefetch hint specified in function "bar" as "120,1 foo,10,2" results + // in a a hint inserted after the first call in block #120 of bar: + // B + // +----------------------------------------------------+ + // | Instruction 1 | + // | call_C (Callsite 1) | + // | code_prefetch __llvm_prfetch_target_foo_10 | + // | Instruction 2 | + // +----------------------------------------------------+ + // Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto FI = ProgramPathAndClusterInfo.end(); diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 03706aaaab237..97848bec7127e 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,13 +166,6 @@ FunctionPass *createX86IndirectThunksPass(); /// This pass replaces ret instructions with jmp's to __x86_return thunk. FunctionPass *createX86ReturnThunksPass(); -/// This pass ensures instructions featuring a memory operand -/// have distinctive (with respect to each other) -FunctionPass *createX86DiscriminateMemOpsPass(); - -/// This pass applies profiling information to insert cache prefetches. -FunctionPass *createX86InsertPrefetchPass(); - /// This pass insert wait instruction after X87 instructions which could raise /// fp exceptions when strict-fp enabled. FunctionPass *createX86InsertX87waitPass(); From 8e9665e66b0068eb5d858652eca17a36479ddd64 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 22:24:22 +0000 Subject: [PATCH 37/38] clang-format. --- .../BasicBlockSectionsProfileReader.cpp | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index e7b33f85bb7fe..223831bb94805 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -179,36 +179,36 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( // specified as a pair "," similar to prefetch // targets, and target is specified as a triple // ",,". - // - // Example: A basic block in function "foo" with BBID 10 and two call - // instructions (call_A, call_B). This block is conceptually split into - // subblocks, with the prefetch target symbol emitted at the beginning of - // each subblock. - // - // +----------------------------------+ - // | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A) - // | Instruction 1 | - // | Instruction 2 | - // | call_A (Callsite 0) | - // | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A, - // | | before call_B) - // | Instruction 3 | - // | call_B (Callsite 1) | - // | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B, - // | | before call_C) - // | Instruction 4 | - // +----------------------------------+ - // - // A prefetch hint specified in function "bar" as "120,1 foo,10,2" results - // in a a hint inserted after the first call in block #120 of bar: - // B - // +----------------------------------------------------+ - // | Instruction 1 | - // | call_C (Callsite 1) | - // | code_prefetch __llvm_prfetch_target_foo_10 | - // | Instruction 2 | - // +----------------------------------------------------+ - // +// +// Example: A basic block in function "foo" with BBID 10 and two call +// instructions (call_A, call_B). This block is conceptually split into +// subblocks, with the prefetch target symbol emitted at the beginning of +// each subblock. +// +// +----------------------------------+ +// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A) +// | Instruction 1 | +// | Instruction 2 | +// | call_A (Callsite 0) | +// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A, +// | | before call_B) +// | Instruction 3 | +// | call_B (Callsite 1) | +// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B, +// | | before call_C) +// | Instruction 4 | +// +----------------------------------+ +// +// A prefetch hint specified in function "bar" as "120,1 foo,10,2" results +// in a a hint inserted after the first call in block #120 of bar: +// B +// +----------------------------------------------------+ +// | Instruction 1 | +// | call_C (Callsite 1) | +// | code_prefetch __llvm_prfetch_target_foo_10 | +// | Instruction 2 | +// +----------------------------------------------------+ +// Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto FI = ProgramPathAndClusterInfo.end(); From f235b8cc332555ad2382402be6191df8a6df4234 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 22:57:51 +0000 Subject: [PATCH 38/38] Add some more comments. --- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 44295b66a1c86..68a500c545651 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -14,7 +14,9 @@ /// be the beginning of any dynamic basic block, that is the beginning of a /// machine basic block, or immediately after a callsite. A global symbol is /// emitted at the position of the target so it can be addressed from the -/// prefetch instruction from any module. +/// prefetch instruction from any module. In order to insert prefetch hints, +/// `TargetInstrInfo::insertCodePrefetchInstr` must be implemented by the +/// target. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/InsertCodePrefetch.h" @@ -110,6 +112,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { DenseMap> PrefetchHintsBySiteBBID; for (const auto &H : PrefetchHints) PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H); + // Sort prefetch hints by their callsite index so we can insert them by one + // pass over the block's instructions. for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) { llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) { return H1.SiteID.CallsiteIndex < H2.SiteID.CallsiteIndex; @@ -127,6 +131,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { auto InstrIt = BB.begin(); for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) { auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt); + // Insert all the prefetch hints which must be placed after this call (or + // at the beginning of the block if `NumCallsInBB` is zero. while (HintIt != PrefetchHints.end() && NumCallsInBB >= HintIt->SiteID.CallsiteIndex) { auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(