Skip to content

Commit 837a3a9

Browse files
committed
Named operand and stable partition applied
(cherry picked from commit ee6d876fcc3d84d6ea3a68b3eee1ce97e714b6e6)
1 parent 3e74575 commit 837a3a9

12 files changed

+890
-1280
lines changed

llvm/include/llvm/CodeGen/MachineRegisterInfo.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -867,13 +867,13 @@ class MachineRegisterInfo {
867867
return RegAllocHints.inBounds(VReg) ? &RegAllocHints[VReg] : nullptr;
868868
}
869869

870-
/// setRegAllocationAntiHint - Add a register allocation anti-hint for the
870+
/// addRegAllocAntiHint - Add a register allocation anti-hint for the
871871
/// specified virtual register. This tells the allocator to avoid allocating
872872
/// VReg to the same physical register as AntiHintVReg (or overlapping ones).
873-
void setRegAllocationAntiHint(Register VReg, Register AntiHintVReg) {
873+
void addRegAllocAntiHint(Register VReg, Register AntiHintVReg) {
874874
assert(VReg.isVirtual() && "Anti-hints are only for virtual registers");
875875
assert(AntiHintVReg.isVirtual() && "Anti-hint target must be virtual");
876-
AntiHintRegs.grow(Register::index2VirtReg(getNumVirtRegs()));
876+
AntiHintRegs.grow(VReg);
877877
SmallVector<Register, 4> &AntiHints = AntiHintRegs[VReg];
878878
// Avoid duplicates
879879
if (llvm::find(AntiHints, AntiHintVReg) == AntiHints.end())
@@ -884,7 +884,7 @@ class MachineRegisterInfo {
884884
void addRegAllocationAntiHints(Register VReg,
885885
ArrayRef<Register> AntiHintVRegs) {
886886
for (Register AntiHint : AntiHintVRegs)
887-
setRegAllocationAntiHint(VReg, AntiHint);
887+
addRegAllocAntiHint(VReg, AntiHint);
888888
}
889889

890890
/// clearRegAllocationAntiHints - Clear all anti-hints for a register.
@@ -915,7 +915,7 @@ class MachineRegisterInfo {
915915
/// VRM is the current virtual register map showing allocations made so far.
916916
void getPhysRegAntiHints(Register VReg,
917917
SmallVectorImpl<MCPhysReg> &PhysAntiHints,
918-
const VirtRegMap *VRM) const;
918+
const VirtRegMap &VRM) const;
919919

920920
/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
921921
/// specified register as undefined which causes the DBG_VALUE to be

llvm/lib/CodeGen/AllocationOrder.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ AllocationOrder AllocationOrder::create(Register VirtReg, const VirtRegMap &VRM,
4848

4949
// Get anti-hints
5050
SmallVector<MCPhysReg, 16> AntiHintedPhysRegs;
51-
MRI.getPhysRegAntiHints(VirtReg, AntiHintedPhysRegs, &VRM);
51+
MRI.getPhysRegAntiHints(VirtReg, AntiHintedPhysRegs, VRM);
5252

5353
LLVM_DEBUG({
5454
if (!AntiHintedPhysRegs.empty()) {
@@ -84,29 +84,34 @@ AllocationOrder AllocationOrder::create(Register VirtReg, const VirtRegMap &VRM,
8484

8585
void AllocationOrder::applyAntiHints(ArrayRef<MCPhysReg> AntiHintedPhysRegs,
8686
const TargetRegisterInfo *TRI) {
87+
// Helper to check if a register overlaps with any anti-hint
88+
auto isAntiHinted = [&](MCPhysReg Reg) {
89+
return std::any_of(
90+
AntiHintedPhysRegs.begin(), AntiHintedPhysRegs.end(),
91+
[&](MCPhysReg AntiHint) { return TRI->regsOverlap(Reg, AntiHint); });
92+
};
93+
8794
// Create filtered order
8895
FilteredOrderStorage.clear();
89-
FilteredOrderStorage.reserve(Order.size());
90-
91-
// Add non-anti-hinted registers first
92-
for (MCPhysReg PhysReg : Order) {
93-
if (!is_contained(AntiHintedPhysRegs, PhysReg)) {
94-
FilteredOrderStorage.push_back(PhysReg);
95-
}
96-
}
96+
FilteredOrderStorage.assign(Order.begin(), Order.end());
9797

98-
// Add anti-hinted registers at the end as last resort
99-
for (MCPhysReg PhysReg : Order) {
100-
if (is_contained(AntiHintedPhysRegs, PhysReg)) {
101-
FilteredOrderStorage.push_back(PhysReg);
102-
}
103-
}
98+
// Partition: non-anti-hinted registers go first
99+
auto PartitionPoint = std::stable_partition(
100+
FilteredOrderStorage.begin(), FilteredOrderStorage.end(),
101+
[&](MCPhysReg Reg) { return !isAntiHinted(Reg); });
104102

105103
// Update Order
106104
Order = FilteredOrderStorage;
107105

108106
LLVM_DEBUG({
109-
dbgs() << "moved " << AntiHintedPhysRegs.size()
110-
<< " anti-hinted registers to end of allocation order\n";
107+
size_t NonAntiHintedCount =
108+
std::distance(FilteredOrderStorage.begin(), PartitionPoint);
109+
size_t AntiHintedCount =
110+
std::distance(PartitionPoint, FilteredOrderStorage.end());
111+
dbgs() << " Added " << NonAntiHintedCount
112+
<< " non-anti-hinted registers first\n"
113+
<< " Added " << AntiHintedCount
114+
<< " anti-hinted registers at the end\n"
115+
<< " Anti-hint filtering complete\n";
111116
});
112117
}

llvm/lib/CodeGen/AllocationOrder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class LiveRegMatrix;
3030

3131
class LLVM_LIBRARY_VISIBILITY AllocationOrder {
3232
const SmallVector<MCPhysReg, 16> Hints;
33+
// Used as storage if the Order received in the constructor needs to be
34+
// altered.
3335
SmallVector<MCPhysReg, 16> FilteredOrderStorage;
3436
ArrayRef<MCPhysReg> Order;
3537
// How far into the Order we can iterate. This is 0 if the AllocationOrder is

llvm/lib/CodeGen/MachineRegisterInfo.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -680,22 +680,19 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
680680

681681
void MachineRegisterInfo::getPhysRegAntiHints(
682682
Register VReg, SmallVectorImpl<MCPhysReg> &PhysAntiHints,
683-
const VirtRegMap *VRM) const {
683+
const VirtRegMap &VRM) const {
684684
assert(VReg.isVirtual());
685-
if (!AntiHintRegs.inBounds(VReg) || !VRM)
685+
if (!AntiHintRegs.inBounds(VReg))
686686
return;
687687

688688
const SmallVector<Register, 4> &AntiHints = AntiHintRegs[VReg];
689-
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
690689

691690
for (Register AntiHintVReg : AntiHints) {
692691
// Check if the anti-hinted register has been allocated
693-
if (VRM->hasPhys(AntiHintVReg)) {
694-
MCPhysReg PhysReg = VRM->getPhys(AntiHintVReg);
695-
// Add the physical register and all its aliases
696-
for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) {
697-
PhysAntiHints.push_back(*AI);
698-
}
692+
if (VRM.hasPhys(AntiHintVReg)) {
693+
MCPhysReg PhysReg = VRM.getPhys(AntiHintVReg);
694+
// Add the physical register
695+
PhysAntiHints.push_back(PhysReg);
699696
}
700697
}
701698

llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "SIRegisterInfo.h"
3838
#include "llvm/CodeGen/LiveIntervals.h"
3939
#include "llvm/CodeGen/MachineFunctionPass.h"
40+
#include "llvm/CodeGen/Register.h"
4041
#include "llvm/InitializePasses.h"
4142

4243
using namespace llvm;
@@ -253,37 +254,45 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
253254
TRI = ST.getRegisterInfo();
254255

255256
bool Changed = false;
256-
257-
// Single pass implementation
257+
// Add RA anti-hints to reduce MFMA hazard NOPs
258258
if (EnableAntiHintsForMFMARegs && ST.hasMAIInsts()) {
259259
// Max lookback window for RAW or WAW hazard
260260
constexpr unsigned MaxLookbackWindow = 19;
261261
for (const MachineBasicBlock &MBB : MF) {
262-
263-
SmallVector<std::pair<SlotIndex, SmallVector<Register, 4>>, 16>
264-
RecentMFMAs;
262+
SmallVector<SmallVector<Register, 4>, 16> RecentMFMAs;
265263
for (const MachineInstr &MI : MBB) {
266264
if (MI.isDebugInstr())
267265
continue;
268-
const SlotIndex CurrentSlot = LIS->getInstructionIndex(MI).getRegSlot();
266+
269267
// Handle MFMA instructions
270268
if (SIInstrInfo::isMFMA(MI)) {
271269
SmallVector<Register, 4> MFMARegisters;
272-
auto collectMFMARegister = [&](unsigned OpIdx) {
273-
if (OpIdx >= MI.getNumOperands())
270+
// Helper to get named operand
271+
auto collectNamedOperand = [&](AMDGPU::OpName OpName,
272+
const char *OpNameStr) {
273+
const MachineOperand *MO = TII->getNamedOperand(MI, OpName);
274+
if (!MO) {
275+
LLVM_DEBUG(dbgs() << " Named operand " << OpNameStr
276+
<< " not found\n");
274277
return;
275-
276-
const MachineOperand &MO = MI.getOperand(OpIdx);
277-
if (MO.isReg() && MO.getReg().isVirtual())
278-
MFMARegisters.push_back(MO.getReg());
278+
}
279+
if (MO->isReg() && MO->getReg().isVirtual()) {
280+
Register Reg = MO->getReg();
281+
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
282+
// Only consider VGPRs
283+
if (TRI->hasVGPRs(RC))
284+
MFMARegisters.push_back(Reg);
285+
LLVM_DEBUG(dbgs() << " Collected " << OpNameStr << " : "
286+
<< printReg(Reg, TRI) << "\n");
287+
}
279288
};
280-
// Only collect Matrix C (operand 3) and destination (operand 0)
281-
// registers
282-
collectMFMARegister(0);
283-
collectMFMARegister(3);
284289

290+
// Collect destination and source C registers
291+
collectNamedOperand(AMDGPU::OpName::vdst, "vdst"); // Destination
292+
collectNamedOperand(AMDGPU::OpName::src2,
293+
"src2"); // Matrix C (accumulator)
285294
if (!MFMARegisters.empty()) {
286-
RecentMFMAs.emplace_back(CurrentSlot, std::move(MFMARegisters));
295+
RecentMFMAs.emplace_back(std::move(MFMARegisters));
287296
// Maintain window
288297
if (RecentMFMAs.size() > MaxLookbackWindow)
289298
RecentMFMAs.erase(RecentMFMAs.begin());
@@ -309,17 +318,13 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
309318
// Only process VGPR registers
310319
if (!TRI->isVGPRClass(CandidateRC))
311320
continue;
312-
313321
for (auto It = RecentMFMAs.rbegin(); It != RecentMFMAs.rend(); ++It) {
314-
const SmallVector<Register, 4> &MFMARegs = It->second;
322+
const SmallVector<Register, 4> &MFMARegs = *It;
315323
for (Register MFMAReg : MFMARegs) {
316-
// Verify register class compatibility
317-
const TargetRegisterClass *MFMARC = MRI->getRegClass(MFMAReg);
318-
if (!TRI->hasVGPRs(MFMARC))
319-
continue;
320-
321324
// Check if MFMA register is dead at current instruction
322325
const LiveInterval &MFMAInterval = LIS->getInterval(MFMAReg);
326+
const SlotIndex CurrentSlot =
327+
LIS->getInstructionIndex(MI).getRegSlot();
323328
if (!MFMAInterval.liveAt(CurrentSlot)) {
324329
// Add bi-directional anti-hints
325330
MRI->addRegAllocationAntiHints(CandidateReg, MFMAReg);

0 commit comments

Comments
 (0)