Skip to content
Open
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
49c5f22
feat(AsmPrinter): Add support for emitting prefetch target symbols
rlavaee Nov 8, 2025
b25adef
feat: Add prefetch-profile.txt for testing
rlavaee Nov 11, 2025
bbfb7ba
Everything else.
rlavaee Nov 12, 2025
3e6b04f
Add test.
rlavaee Nov 12, 2025
9967360
Fix everything
rlavaee Nov 13, 2025
a08b65a
clang-format.
rlavaee Nov 13, 2025
d988a3c
Fix the prefetch test.
rlavaee Nov 13, 2025
4008445
Rename the test.
rlavaee Nov 13, 2025
e3b501f
Remove unrelated changes.
rlavaee Nov 13, 2025
715f1b8
Add some comments.
rlavaee Nov 13, 2025
a1e1e00
clang-format.
rlavaee Nov 13, 2025
717e6fe
Add comments and rename functions.
rlavaee Nov 13, 2025
3605b0d
clang-format.
rlavaee Nov 13, 2025
6408bd7
Add optimization remarks for when prefetch targets cannot be mapped.
rlavaee Nov 13, 2025
a06cb9d
Expand test to weak symbols.
rlavaee Nov 13, 2025
ceefc56
Change prefetch directive format to use , instead of @
rlavaee Nov 17, 2025
639efd7
Fix the error.
rlavaee Nov 17, 2025
cc4e333
Remove optimization remarks.
rlavaee Nov 19, 2025
6d8bdb1
Refine and polish.
rlavaee Nov 19, 2025
d93a5ec
clang-format.
rlavaee Nov 19, 2025
7cb4f6b
Change to using unsigned values for CallsiteIndex
rlavaee Nov 19, 2025
9fdf7d0
clang-format.
rlavaee Nov 19, 2025
0c17e45
Fix AsmPrinter.
rlavaee Nov 19, 2025
500b536
use -O0
rlavaee Nov 19, 2025
a265dbc
feat(AsmPrinter): Add support for emitting prefetch target symbols
rlavaee Nov 8, 2025
fb647e6
feat(AsmPrinter): Add support for emitting prefetch target symbols
rlavaee Nov 8, 2025
ea967e2
feat(CodeGen): Add PrefetchInsertion pass
rlavaee Nov 9, 2025
ed5d461
Implement inserting prefetches into the specified positions.
rlavaee Nov 15, 2025
e9a2af5
clang-format.
rlavaee Nov 15, 2025
b86fc14
Expand the test case to prefetch hints.
rlavaee Nov 17, 2025
d15a92e
clang-format.
rlavaee Nov 17, 2025
1162d69
Remove some unwanted changes.
rlavaee Nov 19, 2025
ffa1762
Fix references.
rlavaee Nov 19, 2025
e1fc727
clang-format.
rlavaee Nov 19, 2025
829bbe2
Explain the prefetch hint.
rlavaee Nov 19, 2025
c5b9a12
Fix it.
rlavaee Nov 19, 2025
8e9665e
clang-format.
rlavaee Nov 19, 2025
f235b8c
Add some more comments.
rlavaee Nov 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,22 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};

// The prefetch symbol is emitted immediately after the call of the given index,
// in block `BBID` (First call has an index of 1). Zero callsite index means the
// start of the block.
struct CallsiteID {
UniqueBBID BBID;
unsigned CallsiteIndex;
};

// This represents a prefetch hint to be injected at site `SiteID`, targetting
// `TargetID` in function `TargetFunction`.
struct PrefetchHint {
CallsiteID SiteID;
StringRef TargetFunction;
CallsiteID TargetID;
};

// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
Expand All @@ -50,9 +66,13 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
// Code prefetch targets, specified by the callsite ID immediately after
// which beginning must be targetted for prefetching.
SmallVector<CallsiteID> PrefetchTargets;
SmallVector<PrefetchHint> PrefetchHints;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge, stored as a nested map.
// Edge counts for each edge.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
// Hash for each basic block. The Hashes are stored for every original block
// (not cloned blocks), hence the map key being unsigned instead of
Expand Down Expand Up @@ -86,6 +106,15 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;

// Returns the prefetch targets (identified by their containing callsite IDs)
// for function `FuncName`.
SmallVector<CallsiteID>
getPrefetchTargetsForFunction(StringRef FuncName) const;

// Returns the prefetch hints to be injected in function `FuncName`.
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;

private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
Expand Down Expand Up @@ -195,6 +224,12 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;

SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;

SmallVector<CallsiteID>
getPrefetchTargetsForFunction(StringRef FuncName) const;

// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
bool doInitialization(Module &M) override;
Expand Down
25 changes: 25 additions & 0 deletions llvm/include/llvm/CodeGen/InsertCodePrefetch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H
#define LLVM_CODEGEN_INSERTCODEPREFETCH_H

#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/UniqueBBID.h"

namespace llvm {

SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
const UniqueBBID &BBID,
unsigned SubblockIndex);

} // end namespace llvm

#endif // LLVM_CODEGEN_INSERTCODEPREFETCH_H
16 changes: 16 additions & 0 deletions llvm/include/llvm/CodeGen/MachineBasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;

SmallVector<unsigned> PrefetchTargets;

/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};

Expand All @@ -229,6 +231,12 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;

/// Contains the callsite indices in this block that are targets of code
/// prefetching. The index `i` specifies the `i`th call, with zero
/// representing the beginning of the block and ` representing the first call.
/// Must be in ascending order and without duplicates.
SmallVector<unsigned> PrefetchTargetCallsiteIndexes;

/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;

Expand Down Expand Up @@ -710,6 +718,14 @@ class MachineBasicBlock

std::optional<UniqueBBID> getBBID() const { return BBID; }

const SmallVector<unsigned> &getPrefetchTargetCallsiteIndexes() const {
return PrefetchTargetCallsiteIndexes;
}

void setPrefetchTargetCallsiteIndexes(const SmallVector<unsigned> &V) {
PrefetchTargetCallsiteIndexes = V;
}

/// Returns the section ID of this basic block.
MBBSectionID getSectionID() const { return SectionID; }

Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();

LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();

LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();

/// createMachineBlockHashInfoPass - This pass computes basic block hashes.
LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();

Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -2381,6 +2381,14 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
llvm_unreachable("unknown number of operands necessary");
}

/// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
/// targetting `GV`.
virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const GlobalValue *GV) const {
return false;
}

private:
mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &);
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &);
LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &);
LLVM_ABI void
initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
Expand Down
36 changes: 34 additions & 2 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/InsertCodePrefetch.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockHashInfo.h"
Expand Down Expand Up @@ -1985,7 +1986,33 @@ void AsmPrinter::emitFunctionBody() {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;

SmallVector<unsigned> PrefetchTargets =
MBB.getPrefetchTargetCallsiteIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned LastCallsiteIndex = 0;
// Helper to emit a symbol for the prefetch target and proceed to the next
// one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
if (PrefetchTargetIt != PrefetchTargets.end() &&
*PrefetchTargetIt == LastCallsiteIndex) {
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
utostr(MBB.getBBID()->BaseID) + Twine("_") +
utostr(static_cast<unsigned>(*PrefetchTargetIt)));
// If the function is weak-linkage it may be replaced by a strong
// version, in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
PrefetchTargetSymbol,
MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
OutStreamer->emitLabel(PrefetchTargetSymbol);
++PrefetchTargetIt;
}
};

for (auto &MI : MBB) {
EmitPrefetchTargetSymbolIfNeeded();

// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugInstr()) {
Expand Down Expand Up @@ -2123,8 +2150,11 @@ void AsmPrinter::emitFunctionBody() {
break;
}

if (MI.isCall() && MF->getTarget().Options.BBAddrMap)
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
if (MI.isCall()) {
if (MF->getTarget().Options.BBAddrMap)
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
LastCallsiteIndex++;
}

if (TM.Options.EmitCallGraphSection && MI.isCall())
handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
Expand All @@ -2136,6 +2166,8 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
// Emit the last prefetch target in case the last instruction was a call.
EmitPrefetchTargetSymbolIfNeeded();

// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/BasicBlockSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;

BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
// BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
// nullptr;

BasicBlockSections() : MachineFunctionPass(ID) {
initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
Expand Down
116 changes: 116 additions & 0 deletions llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}

SmallVector<CallsiteID>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
.PrefetchTargets;
}

SmallVector<PrefetchHint>
BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
StringRef FuncName) const {
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
}

// Reads the version 1 basic block sections profile. Profile for each function
// is encoded as follows:
// m <module_name>
Expand Down Expand Up @@ -148,6 +161,36 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
// +-->: 5 :
// ....
// ****************************************************************************
// This profile can also specify prefetch targets (starting with 't') which
// instruct the compiler to emit a prefetch symbol for the given target.
// A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
// bbid specifies the target basic block and subblock_index is a zero-based
// index. Callsite 0 refers to the region at the beginning of the block up to
// the first callsite. Callsite `i > 0` refers to the region immediately after
// the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
// The prefetch target is always emitted at the beginning of the subblock.
// This is the beginning of the basic block for `i = 0` and immediately after
// the `i`-th call for every `i > 0`.
//
// Example: A basic block in function "foo" with BBID 10 and two call
// instructions (call_A, call_B). This block is conceptually split into
// subblocks, with the prefetch target symbol emitted at the beginning of each
// subblock.
//
// +----------------------------------+
// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A)
// | Instruction 1 |
// | Instruction 2 |
// | call_A (Callsite 0) |
// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A,
// | | before call_B)
// | Instruction 3 |
// | call_B (Callsite 1) |
// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B,
// | | before call_C)
// | Instruction 4 |
// +----------------------------------+
//
Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto FI = ProgramPathAndClusterInfo.end();

Expand Down Expand Up @@ -308,6 +351,67 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
case 'i': { // Prefetch hint specifier.
// Skip the profile when we the profile iterator (FI) refers to the
// past-the-end element.
if (FI == ProgramPathAndClusterInfo.end())
continue;
if (Values.size() != 2)
return createProfileParseError(Twine("Prefetch hint expected: " + S));
SmallVector<StringRef, 2> PrefetchSiteStr;
Values[0].split(PrefetchSiteStr, ',');
if (PrefetchSiteStr.size() != 2)
return createProfileParseError(Twine("Prefetch site expected: ") +
Values[0]);
auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
if (!SiteBBID)
return SiteBBID.takeError();
unsigned long long SiteCallsiteIndex;
if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteCallsiteIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchSiteStr[1]);

SmallVector<StringRef, 3> PrefetchTargetStr;
Values[1].split(PrefetchTargetStr, ',');
if (PrefetchTargetStr.size() != 3)
return createProfileParseError(
Twine("Prefetch target target expected: ") + Values[1]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
if (!TargetBBID)
return TargetBBID.takeError();
unsigned long long TargetCallsiteIndex;
if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetCallsiteIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[2]);
FI->second.PrefetchHints.push_back(PrefetchHint{
CallsiteID{*SiteBBID, static_cast<unsigned>(SiteCallsiteIndex)},
PrefetchTargetStr[0],
CallsiteID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)}});
continue;
}
case 't': { // Prefetch target specifier.
// Skip the profile when we the profile iterator (FI) refers to the
// past-the-end element.
if (FI == ProgramPathAndClusterInfo.end())
continue;
SmallVector<StringRef, 2> PrefetchTargetStr;
if (Values.size() != 1)
return createProfileParseError(Twine("Prefetch target expected: ") + S);
Values[0].split(PrefetchTargetStr, ',');
if (PrefetchTargetStr.size() != 2)
return createProfileParseError(Twine("Prefetch target expected: ") +
Values[0]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
unsigned long long CallsiteIndex;
if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex))
return createProfileParseError(Twine("signed integer expected: '") +
PrefetchTargetStr[1]);
FI->second.PrefetchTargets.push_back(
CallsiteID{*TargetBBID, static_cast<unsigned>(CallsiteIndex)});
continue;
}
default:
return createProfileParseError(Twine("invalid specifier: '") +
Twine(Specifier) + "'");
Expand Down Expand Up @@ -514,6 +618,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}

SmallVector<CallsiteID>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return BBSPR.getPrefetchTargetsForFunction(FuncName);
}

SmallVector<PrefetchHint>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
StringRef FuncName) const {
return BBSPR.getPrefetchHintsForFunction(FuncName);
}

BasicBlockSectionsProfileReader &
BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
return BBSPR;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen
IndirectBrExpandPass.cpp
InitUndef.cpp
InlineSpiller.cpp
InsertCodePrefetch.cpp
InterferenceCache.cpp
InterleavedAccessPass.cpp
InterleavedLoadCombinePass.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
Expand Down
Loading
Loading