3737#include " SIRegisterInfo.h"
3838#include " llvm/CodeGen/LiveIntervals.h"
3939#include " llvm/CodeGen/MachineFunctionPass.h"
40+ #include " llvm/CodeGen/Register.h"
4041#include " llvm/InitializePasses.h"
4142
4243using namespace llvm ;
@@ -253,37 +254,45 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
253254 TRI = ST.getRegisterInfo ();
254255
255256 bool Changed = false ;
256-
257- // Single pass implementation
257+ // Add RA anti-hints to reduce MFMA hazard NOPs
258258 if (EnableAntiHintsForMFMARegs && ST.hasMAIInsts ()) {
259259 // Max lookback window for RAW or WAW hazard
260260 constexpr unsigned MaxLookbackWindow = 19 ;
261261 for (const MachineBasicBlock &MBB : MF) {
262-
263- SmallVector<std::pair<SlotIndex, SmallVector<Register, 4 >>, 16 >
264- RecentMFMAs;
262+ SmallVector<SmallVector<Register, 4 >, 16 > RecentMFMAs;
265263 for (const MachineInstr &MI : MBB) {
266264 if (MI.isDebugInstr ())
267265 continue ;
268- const SlotIndex CurrentSlot = LIS-> getInstructionIndex (MI). getRegSlot ();
266+
269267 // Handle MFMA instructions
270268 if (SIInstrInfo::isMFMA (MI)) {
271269 SmallVector<Register, 4 > MFMARegisters;
272- auto collectMFMARegister = [&](unsigned OpIdx) {
273- if (OpIdx >= MI.getNumOperands ())
270+ // Helper to get named operand
271+ auto collectNamedOperand = [&](AMDGPU::OpName OpName,
272+ const char *OpNameStr) {
273+ const MachineOperand *MO = TII->getNamedOperand (MI, OpName);
274+ if (!MO) {
275+ LLVM_DEBUG (dbgs () << " Named operand " << OpNameStr
276+ << " not found\n " );
274277 return ;
275-
276- const MachineOperand &MO = MI.getOperand (OpIdx);
277- if (MO.isReg () && MO.getReg ().isVirtual ())
278- MFMARegisters.push_back (MO.getReg ());
278+ }
279+ if (MO->isReg () && MO->getReg ().isVirtual ()) {
280+ Register Reg = MO->getReg ();
281+ const TargetRegisterClass *RC = MRI->getRegClass (Reg);
282+ // Only consider VGPRs
283+ if (TRI->hasVGPRs (RC))
284+ MFMARegisters.push_back (Reg);
285+ LLVM_DEBUG (dbgs () << " Collected " << OpNameStr << " : "
286+ << printReg (Reg, TRI) << " \n " );
287+ }
279288 };
280- // Only collect Matrix C (operand 3) and destination (operand 0)
281- // registers
282- collectMFMARegister (0 );
283- collectMFMARegister (3 );
284289
290+ // Collect destination and source C registers
291+ collectNamedOperand (AMDGPU::OpName::vdst, " vdst" ); // Destination
292+ collectNamedOperand (AMDGPU::OpName::src2,
293+ " src2" ); // Matrix C (accumulator)
285294 if (!MFMARegisters.empty ()) {
286- RecentMFMAs.emplace_back (CurrentSlot, std::move (MFMARegisters));
295+ RecentMFMAs.emplace_back (std::move (MFMARegisters));
287296 // Maintain window
288297 if (RecentMFMAs.size () > MaxLookbackWindow)
289298 RecentMFMAs.erase (RecentMFMAs.begin ());
@@ -309,17 +318,13 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
309318 // Only process VGPR registers
310319 if (!TRI->isVGPRClass (CandidateRC))
311320 continue ;
312-
313321 for (auto It = RecentMFMAs.rbegin (); It != RecentMFMAs.rend (); ++It) {
314- const SmallVector<Register, 4 > &MFMARegs = It-> second ;
322+ const SmallVector<Register, 4 > &MFMARegs = *It ;
315323 for (Register MFMAReg : MFMARegs) {
316- // Verify register class compatibility
317- const TargetRegisterClass *MFMARC = MRI->getRegClass (MFMAReg);
318- if (!TRI->hasVGPRs (MFMARC))
319- continue ;
320-
321324 // Check if MFMA register is dead at current instruction
322325 const LiveInterval &MFMAInterval = LIS->getInterval (MFMAReg);
326+ const SlotIndex CurrentSlot =
327+ LIS->getInstructionIndex (MI).getRegSlot ();
323328 if (!MFMAInterval.liveAt (CurrentSlot)) {
324329 // Add bi-directional anti-hints
325330 MRI->addRegAllocationAntiHints (CandidateReg, MFMAReg);
0 commit comments