diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index ee1f28377f7e4..161a810298d69 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,6 +42,14 @@ struct BBClusterInfo { unsigned PositionInCluster; }; +// The prefetch symbol is emitted immediately after the call of the given index, +// in block `BBID` (First call has an index of 1). Zero callsite index means the +// start of the block. +struct CallsiteID { + UniqueBBID BBID; + unsigned CallsiteIndex; +}; + // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { // BB Cluster information specified by `UniqueBBID`s. @@ -50,9 +58,12 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; + // Code prefetch targets, specified by the callsite ID immediately after + // which beginning must be targetted for prefetching. + SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; - // Edge counts for each edge, stored as a nested map. + // Edge counts for each edge. DenseMap> EdgeCounts; // Hash for each basic block. The Hashes are stored for every original block // (not cloned blocks), hence the map key being unsigned instead of @@ -86,6 +97,11 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; + // Returns the prefetch targets (identified by their containing callsite IDs) + // for function `FuncName`. + SmallVector + getPrefetchTargetsForFunction(StringRef FuncName) const; + private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -195,6 +211,9 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; + SmallVector + getPrefetchTargetsForFunction(StringRef FuncName) const; + // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. bool doInitialization(Module &M) override; diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index fcf7bab09fcff..48248bd0461bc 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -229,6 +229,12 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; + /// Contains the callsite indices in this block that are targets of code + /// prefetching. The index `i` specifies the `i`th call, with zero + /// representing the beginning of the block and ` representing the first call. + /// Must be in ascending order and without duplicates. + SmallVector PrefetchTargetCallsiteIndexes; + /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -710,6 +716,14 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } + const SmallVector &getPrefetchTargetCallsiteIndexes() const { + return PrefetchTargetCallsiteIndexes; + } + + void setPrefetchTargetCallsiteIndexes(const SmallVector &V) { + PrefetchTargetCallsiteIndexes = V; + } + /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index a8525554b142e..f148d050a5772 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass(); LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass(); +LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass(); + /// createMachineBlockHashInfoPass - This pass computes basic block hashes. LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 10a4d8525a9e8..35d5ab14dc226 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &); LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &); LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &); LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &); +LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3aa245b7f3f1e..a48d333b538ec 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1985,7 +1985,33 @@ void AsmPrinter::emitFunctionBody() { // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; + + SmallVector PrefetchTargets = + MBB.getPrefetchTargetCallsiteIndexes(); + auto PrefetchTargetIt = PrefetchTargets.begin(); + unsigned LastCallsiteIndex = 0; + // Helper to emit a symbol for the prefetch target and proceed to the next + // one. + auto EmitPrefetchTargetSymbolIfNeeded = [&]() { + if (PrefetchTargetIt != PrefetchTargets.end() && + *PrefetchTargetIt == LastCallsiteIndex) { + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + + utostr(MBB.getBBID()->BaseID) + Twine("_") + + utostr(static_cast(*PrefetchTargetIt))); + // If the function is weak-linkage it may be replaced by a strong + // version, in which case the prefetch targets should also be replaced. + OutStreamer->emitSymbolAttribute( + PrefetchTargetSymbol, + MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++PrefetchTargetIt; + } + }; + for (auto &MI : MBB) { + EmitPrefetchTargetSymbolIfNeeded(); + // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugInstr()) { @@ -2123,8 +2149,11 @@ void AsmPrinter::emitFunctionBody() { break; } - if (MI.isCall() && MF->getTarget().Options.BBAddrMap) - OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); + if (MI.isCall()) { + if (MF->getTarget().Options.BBAddrMap) + OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); + LastCallsiteIndex++; + } if (TM.Options.EmitCallGraphSection && MI.isCall()) handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI); @@ -2136,6 +2165,8 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } + // Emit the last prefetch target in case the last instruction was a call. + EmitPrefetchTargetSymbolIfNeeded(); // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index c234c0f1b0b34..8762f982f72ea 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,6 +93,13 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } +SmallVector +BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) + .PrefetchTargets; +} + // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m @@ -148,6 +155,36 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( // +-->: 5 : // .... // **************************************************************************** +// This profile can also specify prefetch targets (starting with 't') which +// instruct the compiler to emit a prefetch symbol for the given target. +// A prefetch target is specified by a pair "," where +// bbid specifies the target basic block and subblock_index is a zero-based +// index. Subblock 0 refers to the region at the beginning of the block up to +// the first callsite. Subblock `i > 0` refers to the region immediately after +// the `i`-th callsite up to the `i+1`-th callsite (or the end of the block). +// The prefetch target is always emitted at the beginning of the subblock. +// This is the beginning of the basic block for `i = 0` and immediately after +// the `i`-th call for every `i > 0`. +// +// Example: A basic block in function "foo" with BBID 10 and two call +// instructions (call_A, call_B). This block is conceptually split into +// subblocks, with the prefetch target symbol emitted at the beginning of each +// subblock. +// +// +----------------------------------+ +// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A) +// | Instruction 1 | +// | Instruction 2 | +// | call_A (Callsite 0) | +// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A, +// | | before call_B) +// | Instruction 3 | +// | call_B (Callsite 1) | +// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B, +// | | before call_C) +// | Instruction 4 | +// +----------------------------------+ +// Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto FI = ProgramPathAndClusterInfo.end(); @@ -308,6 +345,27 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } + case 't': { // Callsite target specifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; + SmallVector PrefetchTargetStr; + Values[0].split(PrefetchTargetStr, ','); + if (PrefetchTargetStr.size() != 2) + return createProfileParseError(Twine("Callsite target expected: ") + + Values[0]); + auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); + if (!TargetBBID) + return TargetBBID.takeError(); + unsigned long long CallsiteIndex; + if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex)) + return createProfileParseError(Twine("signed integer expected: '") + + PrefetchTargetStr[1]); + FI->second.PrefetchTargets.push_back( + CallsiteID{*TargetBBID, static_cast(CallsiteIndex)}); + continue; + } default: return createProfileParseError(Twine("invalid specifier: '") + Twine(Specifier) + "'"); @@ -514,6 +572,12 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } +SmallVector +BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( + StringRef FuncName) const { + return BBSPR.getPrefetchTargetsForFunction(FuncName); +} + BasicBlockSectionsProfileReader & BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { return BBSPR; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 1cf0b4964760b..fcf28247179ca 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen IndirectBrExpandPass.cpp InitUndef.cpp InlineSpiller.cpp + InsertCodePrefetch.cpp InterferenceCache.cpp InterleavedAccessPass.cpp InterleavedLoadCombinePass.cpp diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp new file mode 100644 index 0000000000000..44864cbc99c52 --- /dev/null +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -0,0 +1,101 @@ +//===-- InsertCodePrefetch.cpp ---=========--------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Code Prefetch Insertion Pass. +//===----------------------------------------------------------------------===// +/// This pass inserts code prefetch instructions according to the prefetch +/// directives in the basic block section profile. The target of a prefetch can +/// be the beginning of any dynamic basic block, that is the beginning of a +/// machine basic block, or immediately after a callsite. A global symbol is +/// emitted at the position of the target so it can be addressed from the +/// prefetch instruction from any module. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; +#define DEBUG_TYPE "insert-code-prefetch" + +namespace { +class InsertCodePrefetch : public MachineFunctionPass { +public: + static char ID; + + InsertCodePrefetch() : MachineFunctionPass(ID) { + initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Code Prefetch Inserter Pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + // Sets prefetch targets based on the bb section profile. + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +char InsertCodePrefetch::ID = 0; +INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", + true, false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) +INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", + true, false) + +bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { + assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && + "BB Sections list not enabled!"); + if (hasInstrProfHashMismatch(MF)) + return false; + // Set each block's prefetch targets so AsmPrinter can emit a special symbol + // there. + SmallVector PrefetchTargets = + getAnalysis() + .getPrefetchTargetsForFunction(MF.getName()); + DenseMap> PrefetchTargetsByBBID; + for (const auto &Target : PrefetchTargets) + PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); + // Sort and uniquify the callsite indices for every block. + for (auto &[K, V] : PrefetchTargetsByBBID) { + llvm::sort(V); + V.erase(llvm::unique(V), V.end()); + } + for (auto &MBB : MF) { + auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); + if (R == PrefetchTargetsByBBID.end()) + continue; + MBB.setPrefetchTargetCallsiteIndexes(R->second); + } + return false; +} + +void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineFunctionPass *llvm::createInsertCodePrefetchPass() { + return new InsertCodePrefetch(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index ceae0d29eea90..5334c5596d018 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1291,6 +1291,7 @@ void TargetPassConfig::addMachinePasses() { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); addPass(llvm::createBasicBlockPathCloningPass()); + addPass(llvm::createInsertCodePrefetchPass()); } addPass(llvm::createBasicBlockSectionsPass()); } diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll new file mode 100644 index 0000000000000..e5778b4b77fc2 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -0,0 +1,65 @@ +;; Check prefetch directives in basic block section profiles. +;; +;; Specify the bb sections profile: +; RUN: echo 'v1' > %t +; RUN: echo 'f _Z3foob' >> %t +; RUN: echo 't 0,0' >> %t +; RUN: echo 't 1,0' >> %t +; RUN: echo 't 1,1' >> %t +; RUN: echo 't 2,1' >> %t +; RUN: echo 't 3,0' >> %t +; RUN: echo 'f _Z3barv' >> %t +; RUN: echo 't 0,0' >> %t +; RUN: echo 't 21,1' >> %t +;; +; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O0 | FileCheck %s + +define i32 @_Z3foob(i1 zeroext %0) nounwind { + %2 = alloca i32, align 4 + %3 = alloca i8, align 1 + %4 = zext i1 %0 to i8 + store i8 %4, ptr %3, align 1 + %5 = load i8, ptr %3, align 1 + %6 = trunc i8 %5 to i1 + %7 = zext i1 %6 to i32 + %8 = icmp sgt i32 %7, 0 + br i1 %8, label %9, label %11 +; CHECK: _Z3foob: +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_0_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_0_0: + +9: ; preds = %1 + %10 = call i32 @_Z3barv() + store i32 %10, ptr %2, align 4 + br label %13 +; CHECK: .globl __llvm_prefetch_target__Z3foob_1_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_0: +; CHECK-NEXT: callq _Z3barv@PLT +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_1_1 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_1: + +11: ; preds = %1 + %12 = call i32 @_Z3bazv() + store i32 %12, ptr %2, align 4 + br label %13 +; CHECK: callq _Z3bazv@PLT +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_2_1 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1: + +13: ; preds = %11, %9 + %14 = load i32, ptr %2, align 4 + ret i32 %14 +; CHECK: .LBB0_3: +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_3_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_3_0: +} + +define weak i32 @_Z3barv() nounwind { + %1 = call i32 @_Z3bazv() + ret i32 %1 +; CHECK: _Z3barv: +; CHECK-NEXT: .weak __llvm_prefetch_target__Z3barv_0_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0: +} + +declare i32 @_Z3bazv() #1