-
Notifications
You must be signed in to change notification settings - Fork 15.2k
X86: Add prefetch insertion based on Propeller profile #166324
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: Rahman Lavaee (rlavaee) ChangesThis commit introduces a new pass for prefetch insertion on X86 targets. The pass utilizes Propeller profiles to guide prefetch placement, optimizing memory access patterns. The new file llvm/lib/Target/X86/PrefetchInsertion.cpp implements this functionality. This commit also includes necessary modifications to related CodeGen and X86 target files to integrate the new pass. A build issue where PrefetchInsertion.cpp was not included in the CMakeLists.txt was also resolved. Patch is 93.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166324.diff 13 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 48650a6df22ff..b288374a38226 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
+struct BBPosition {
+ UniqueBBID BBID;
+ unsigned BBOffset;
+};
+
+struct PrefetchHint {
+ BBPosition SitePosition;
+ StringRef TargetFunctionName;
+ BBPosition TargetPosition;
+};
+
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -50,19 +61,42 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
+ SmallVector<PrefetchHint> PrefetchHints;
+ DenseSet<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
- // Edge counts for each edge, stored as a nested map.
+ // Edge counts for each edge.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
};
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+ static inline BBPosition getEmptyKey() {
+ return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+ DenseMapInfo<unsigned>::getEmptyKey()};
+ }
+ static inline BBPosition getTombstoneKey() {
+ return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+ DenseMapInfo<unsigned>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const BBPosition &Val) {
+ std::pair<unsigned, unsigned> PairVal = std::make_pair(
+ DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+ return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+ }
+ static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+ return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+ DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+ }
+};
+
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
BasicBlockSectionsProfileReader(const MemoryBuffer *Buf)
- : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'){};
+ : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') {};
- BasicBlockSectionsProfileReader(){};
+ BasicBlockSectionsProfileReader() {};
// Returns true if basic block sections profile exist for function \p
// FuncName.
@@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
@@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 71739278cf513..deff97416df23 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
+struct PrefetchTarget {
+ StringRef TargetFunction;
+ UniqueBBID TargetBBID;
+ unsigned TargetBBOffset;
+};
+
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
+ SmallVector<unsigned> PrefetchTargets;
+
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -229,6 +237,8 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
+ mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -254,9 +264,7 @@ class MachineBasicBlock
/// Remove the reference to the underlying IR BasicBlock. This is for
/// reduction tools and should generally not be used.
- void clearBasicBlock() {
- BB = nullptr;
- }
+ void clearBasicBlock() { BB = nullptr; }
/// Check if there is a name of corresponding LLVM basic block.
LLVM_ABI bool hasName() const;
@@ -348,24 +356,24 @@ class MachineBasicBlock
LLVM_ABI bool sizeWithoutDebugLargerThan(unsigned Limit) const;
bool empty() const { return Insts.empty(); }
- MachineInstr &instr_front() { return Insts.front(); }
- MachineInstr &instr_back() { return Insts.back(); }
+ MachineInstr &instr_front() { return Insts.front(); }
+ MachineInstr &instr_back() { return Insts.back(); }
const MachineInstr &instr_front() const { return Insts.front(); }
- const MachineInstr &instr_back() const { return Insts.back(); }
-
- MachineInstr &front() { return Insts.front(); }
- MachineInstr &back() { return *--end(); }
- const MachineInstr &front() const { return Insts.front(); }
- const MachineInstr &back() const { return *--end(); }
-
- instr_iterator instr_begin() { return Insts.begin(); }
- const_instr_iterator instr_begin() const { return Insts.begin(); }
- instr_iterator instr_end() { return Insts.end(); }
- const_instr_iterator instr_end() const { return Insts.end(); }
- reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); }
+ const MachineInstr &instr_back() const { return Insts.back(); }
+
+ MachineInstr &front() { return Insts.front(); }
+ MachineInstr &back() { return *--end(); }
+ const MachineInstr &front() const { return Insts.front(); }
+ const MachineInstr &back() const { return *--end(); }
+
+ instr_iterator instr_begin() { return Insts.begin(); }
+ const_instr_iterator instr_begin() const { return Insts.begin(); }
+ instr_iterator instr_end() { return Insts.end(); }
+ const_instr_iterator instr_end() const { return Insts.end(); }
+ reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); }
const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); }
- reverse_instr_iterator instr_rend () { return Insts.rend(); }
- const_reverse_instr_iterator instr_rend () const { return Insts.rend(); }
+ reverse_instr_iterator instr_rend() { return Insts.rend(); }
+ const_reverse_instr_iterator instr_rend() const { return Insts.rend(); }
using instr_range = iterator_range<instr_iterator>;
using const_instr_range = iterator_range<const_instr_iterator>;
@@ -374,10 +382,10 @@ class MachineBasicBlock
return const_instr_range(instr_begin(), instr_end());
}
- iterator begin() { return instr_begin(); }
- const_iterator begin() const { return instr_begin(); }
- iterator end () { return instr_end(); }
- const_iterator end () const { return instr_end(); }
+ iterator begin() { return instr_begin(); }
+ const_iterator begin() const { return instr_begin(); }
+ iterator end() { return instr_end(); }
+ const_iterator end() const { return instr_end(); }
reverse_iterator rbegin() {
return reverse_iterator::getAtBundleBegin(instr_rbegin());
}
@@ -424,38 +432,30 @@ class MachineBasicBlock
SmallVectorImpl<MachineBasicBlock *>::reverse_iterator;
using const_succ_reverse_iterator =
SmallVectorImpl<MachineBasicBlock *>::const_reverse_iterator;
- pred_iterator pred_begin() { return Predecessors.begin(); }
- const_pred_iterator pred_begin() const { return Predecessors.begin(); }
- pred_iterator pred_end() { return Predecessors.end(); }
- const_pred_iterator pred_end() const { return Predecessors.end(); }
- pred_reverse_iterator pred_rbegin()
- { return Predecessors.rbegin();}
- const_pred_reverse_iterator pred_rbegin() const
- { return Predecessors.rbegin();}
- pred_reverse_iterator pred_rend()
- { return Predecessors.rend(); }
- const_pred_reverse_iterator pred_rend() const
- { return Predecessors.rend(); }
- unsigned pred_size() const {
- return (unsigned)Predecessors.size();
- }
- bool pred_empty() const { return Predecessors.empty(); }
- succ_iterator succ_begin() { return Successors.begin(); }
- const_succ_iterator succ_begin() const { return Successors.begin(); }
- succ_iterator succ_end() { return Successors.end(); }
- const_succ_iterator succ_end() const { return Successors.end(); }
- succ_reverse_iterator succ_rbegin()
- { return Successors.rbegin(); }
- const_succ_reverse_iterator succ_rbegin() const
- { return Successors.rbegin(); }
- succ_reverse_iterator succ_rend()
- { return Successors.rend(); }
- const_succ_reverse_iterator succ_rend() const
- { return Successors.rend(); }
- unsigned succ_size() const {
- return (unsigned)Successors.size();
- }
- bool succ_empty() const { return Successors.empty(); }
+ pred_iterator pred_begin() { return Predecessors.begin(); }
+ const_pred_iterator pred_begin() const { return Predecessors.begin(); }
+ pred_iterator pred_end() { return Predecessors.end(); }
+ const_pred_iterator pred_end() const { return Predecessors.end(); }
+ pred_reverse_iterator pred_rbegin() { return Predecessors.rbegin(); }
+ const_pred_reverse_iterator pred_rbegin() const {
+ return Predecessors.rbegin();
+ }
+ pred_reverse_iterator pred_rend() { return Predecessors.rend(); }
+ const_pred_reverse_iterator pred_rend() const { return Predecessors.rend(); }
+ unsigned pred_size() const { return (unsigned)Predecessors.size(); }
+ bool pred_empty() const { return Predecessors.empty(); }
+ succ_iterator succ_begin() { return Successors.begin(); }
+ const_succ_iterator succ_begin() const { return Successors.begin(); }
+ succ_iterator succ_end() { return Successors.end(); }
+ const_succ_iterator succ_end() const { return Successors.end(); }
+ succ_reverse_iterator succ_rbegin() { return Successors.rbegin(); }
+ const_succ_reverse_iterator succ_rbegin() const {
+ return Successors.rbegin();
+ }
+ succ_reverse_iterator succ_rend() { return Successors.rend(); }
+ const_succ_reverse_iterator succ_rend() const { return Successors.rend(); }
+ unsigned succ_size() const { return (unsigned)Successors.size(); }
+ bool succ_empty() const { return Successors.empty(); }
inline iterator_range<pred_iterator> predecessors() {
return make_range(pred_begin(), pred_end());
@@ -528,8 +528,8 @@ class MachineBasicBlock
}
LLVM_ABI livein_iterator livein_begin() const;
- livein_iterator livein_end() const { return LiveIns.end(); }
- bool livein_empty() const { return LiveIns.empty(); }
+ livein_iterator livein_end() const { return LiveIns.end(); }
+ bool livein_empty() const { return LiveIns.empty(); }
iterator_range<livein_iterator> liveins() const {
return make_range(livein_begin(), livein_end());
}
@@ -581,13 +581,9 @@ class MachineBasicBlock
return Tmp;
}
- reference operator*() const {
- return *LiveRegI;
- }
+ reference operator*() const { return *LiveRegI; }
- pointer operator->() const {
- return &*LiveRegI;
- }
+ pointer operator->() const { return &*LiveRegI; }
bool operator==(const liveout_iterator &RHS) const {
if (BlockI != BlockEnd)
@@ -598,6 +594,7 @@ class MachineBasicBlock
bool operator!=(const liveout_iterator &RHS) const {
return !(*this == RHS);
}
+
private:
bool advanceToValidPosition() {
if (LiveRegI != (*BlockI)->livein_end())
@@ -710,6 +707,14 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
+ const SmallVector<unsigned> &getPrefetchTargets() const {
+ return PrefetchTargets;
+ }
+
+ void setPrefetchTargets(const SmallVector<unsigned> &V) {
+ PrefetchTargets = V;
+ }
+
/// Returns the section ID of this basic block.
MBBSectionID getSectionID() const { return SectionID; }
@@ -978,9 +983,7 @@ class MachineBasicBlock
/// Convenience function that returns true if the block ends in a return
/// instruction.
- bool isReturnBlock() const {
- return !empty() && back().isReturn();
- }
+ bool isReturnBlock() const { return !empty() && back().isReturn(); }
/// Convenience function that returns true if the bock ends in a EH scope
/// return instruction.
@@ -1057,8 +1060,7 @@ class MachineBasicBlock
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M);
/// Insert a range of instructions into the instruction list before I.
- template<typename IT>
- void insert(iterator I, IT S, IT E) {
+ template <typename IT> void insert(iterator I, IT S, IT E) {
assert((I == end() || I->getParent() == this) &&
"iterator points outside of basic block");
Insts.insert(I.getInstrIterator(), S, E);
@@ -1116,17 +1118,13 @@ class MachineBasicBlock
/// Remove an instruction or bundle from the instruction list and delete it.
///
/// If I points to a bundle of instructions, they are all erased.
- iterator erase(iterator I) {
- return erase(I, std::next(I));
- }
+ iterator erase(iterator I) { return erase(I, std::next(I)); }
/// Remove an instruction from the instruction list and delete it.
///
/// If I is the head of a bundle of instructions, the whole bundle will be
/// erased.
- iterator erase(MachineInstr *I) {
- return erase(iterator(I));
- }
+ iterator erase(MachineInstr *I) { return erase(iterator(I)); }
/// Remove the unbundled instruction from the instruction list without
/// deleting it.
@@ -1145,9 +1143,7 @@ class MachineBasicBlock
/// bundle will still be bundled after removing the single instruction.
LLVM_ABI MachineInstr *remove_instr(MachineInstr *I);
- void clear() {
- Insts.clear();
- }
+ void clear() { Insts.clear(); }
/// Take an instruction from MBB 'Other' at the position From, and insert it
/// into this MBB right before 'Where'.
@@ -1164,8 +1160,8 @@ class MachineBasicBlock
///
/// The instruction at 'Where' must not be included in the range of
/// instructions to move.
- void splice(iterator Where, MachineBasicBlock *Other,
- iterator From, iterator To) {
+ void splice(iterator Where, MachineBasicBlock *Other, iterator From,
+ iterator To) {
Insts.splice(Where.getInstrIterator(), Other->Insts,
From.getInstrIterator(), To.getInstrIterator());
}
@@ -1251,7 +1247,7 @@ class MachineBasicBlock
bool IsStandalone = true) const;
enum PrintNameFlag {
- PrintNameIr = (1 << 0), ///< Add IR name where available
+ PrintNameIr = (1 << 0), ///< Add IR name where available
PrintNameAttributes = (1 << 1), ///< Print attributes
};
@@ -1275,6 +1271,12 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
+ MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+ const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
+ return CallInstSymbols;
+ }
+
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
@@ -1282,9 +1284,7 @@ class MachineBasicBlock
return IrrLoopHeaderWeight;
}
- void setIrrLoopHeaderWeight(uint64_t Weight) {
- IrrLoopHeaderWeight = Weight;
- }
+ void setIrrLoopHeaderWeight(uint64_t Weight) { IrrLoopHeaderWeight = Weight; }
/// Return probability of the edge from this block to MBB. This method should
/// NOT be called directly, but by using getEdgeProbability method from
@@ -1393,7 +1393,7 @@ static_assert(GraphHasNodeNumbers<const MachineBasicBlock *>,
// to be when traversing the predecessor edges of a MBB
// instead of the successor edges.
//
-template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<MachineBasicBlock *>> {
using NodeRef = MachineBasicBlock *;
using ChildIteratorType = MachineBasicBlock::pred_iterator;
@@ -1413,7 +1413,7 @@ template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
static_assert(GraphHasNodeNumbers<Inverse<MachineBasicBlock *>>,
"GraphTraits getNumber() not detected");
-template <> struct GraphTraits<Inverse<const MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<const MachineBasicBlock *>> {
using NodeRef = const MachineBasicBlock *;
using ChildIteratorType = MachineBasicBlock::const_pred_iterator;
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 4fcb7f36e0238..ab9fe82bc7917 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -78,9 +78,9 @@ class MachineInstr
/// otherwise easily derivable from the IR text.
///
enum CommentFlag {
- ReloadReuse = 0x1, // higher bits are reserved for target dep comments.
+ ReloadReuse = 0x1, // higher bits are reserved for target dep comments.
NoSchedComment = 0x2,
- TAsmComments = 0x4 // Target Asm comments should start from this value.
+ TAsmComments = 0x4 // Target Asm comments should start from this value.
};
enum MIFlag {
@@ -123,16 +123,17 @@ class MachineInstr
NoUSWrap = 1 << 20, // Instruction supports geps
// no unsigned signed wrap.
SameSign = 1 << 21, // Both operands have the same sign.
- InBounds = 1 << 22 // Pointer arithmetic remains inbounds.
+ InBounds = 1 << 22, // Pointer arithmetic remains inbounds.
// Implies NoUSWrap.
+ Prefetch = 1 << 23, // Instruction is a prefetch.
};
private:
- const MCInstrDesc *MCID; // Instruction descriptor.
- MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block.
+ const MCInstrDesc *MCID; // Instruction descriptor.
+ MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block.
// Operands are allocated by an ArrayRecycler.
- MachineOperand *Operands = nullptr; // Pointer to the first operand.
+ MachineOperand *Operands = nullptr; // Pointer to the first operand.
#define LLVM_MI_NUMOPERANDS_BITS 24
#define LLVM_MI_FLAGS_BITS 24
@@ -144,7 +145,7 @@ class MachineInstr
// OperandCapacity has uint8_t size, so it should be next to NumOperands
// to properly pack.
using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
- OperandCapacity CapOperands; // Capacity of the Operands array.
+ OperandCapacity CapOperands; // Capacity of the Operands array.
/// Various bits of additional information about the machine instruction.
uint32_t Flags : LLVM_MI_FLAGS_BITS;
@@ -226,9 +227,8 @@ class MachineInstr
}
MDNode *getPCSections() const {
- return HasPCSections
- ? g...
[truncated]
|
3e6212c to
47e051c
Compare
47e051c to
87f856a
Compare
8d6b7ef to
e1fc727
Compare
boomanaiden154
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we also get a NewPM version of this pass? See some of my recent patches to https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86PassRegistry.def on how to wire it up.
Apologies. I thought my initial PR was a draft. |
🐧 Linux x64 Test Results
|
This is the second PR (currently including the changes from first PR) for enabling Prefetch insertion usign Propeller.
This PR uses the PrefetchInsertion pass introduced in the first PR to insert prefetch hints at the given positions.