From 1c2072fa094775a827144b9ce410f8df96faaf5b Mon Sep 17 00:00:00 2001 From: hjagasiaAMD Date: Thu, 20 Nov 2025 16:09:08 -0600 Subject: [PATCH 1/7] [AMDGPU] Fix AGPR_32 reg assign for mfma scale ops In MFMA rewrite pass, prevent AGPR_32 reg class assignment for scale operands, not permitted by instruction format. --- llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 4 ++++ .../test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index 89c16dadb4b41..b5e3187289160 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -302,6 +302,10 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR( const TargetRegisterClass *EquivalentAGPRRegClass = TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg)); + // Do not reassign scale operands + if (EquivalentAGPRRegClass == &AMDGPU::AGPR_32RegClass) + return false; + MCPhysReg Assignable = AMDGPU::NoRegister; if (EquivalentAGPRRegClass->contains(PrefPhysReg) && LRM.checkInterference(ReassignLI, PrefPhysReg) == diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir index ab56c9982753f..12be806960b67 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir @@ -1,6 +1,6 @@ -# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s -# CHECK: Illegal virtual register for instruction -# CHECK: Expected a VGPR_32 register, but got a AGPR_32 register +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s +# CHECK-NOT: Illegal virtual register for instruction +# CHECK-NOT: Expected a VGPR_32 register, but got a AGPR_32 register # Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand # in vgpr_32 register to agpr_32, not permitted by instruction format. From d9405f21486a7a66f5770b9264f0c1ec45819800 Mon Sep 17 00:00:00 2001 From: hjagasiaAMD Date: Wed, 26 Nov 2025 16:22:38 -0600 Subject: [PATCH 2/7] Check operand constraints and update mir checks. --- .../AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 29 ++++++++++++++----- .../rewrite-vgpr-mfma-scale-to-agpr.mir | 8 +++-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index b5e3187289160..d957e8e6f85a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -96,8 +96,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl { /// Compute the register class constraints based on the uses of \p Reg, /// excluding MFMA uses from which can be rewritten to change the register - /// class constraint. This should be nearly identical to - /// MachineRegisterInfo::recomputeRegClass. + /// class constraint. MFMA scale operands need to be constraint checked. + /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass. /// \p RewriteCandidates will collect the set of MFMA instructions that need /// to have the opcode mutated to perform the replacement. @@ -151,9 +151,26 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the // effects of rewrite candidates. It just so happens that we can use - // either AGPR or VGPR in src0/src1, so don't bother checking the - // constraint effects of the individual operands. + // either AGPR or VGPR in src0/src1. We still need to check constraint + // effects for scale variant, which does not allow AGPR. if (isRewriteCandidate(*MI)) { + + int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); + MachineInstrBuilder TmpMIB = + BuildMI(*MI->getParent(), MI->getIterator(), MI->getDebugLoc(), + TII.get(AGPROp)); + for (const MachineOperand &TmpMO : MI->operands()) + TmpMIB.add(TmpMO); + MachineInstr *TmpMI = TmpMIB.getInstr(); + unsigned OpNo = &MO - &MI->getOperand(0); + const TargetRegisterClass *EquivalentAGPRRegClass = + TRI.getEquivalentAGPRClass(MRI.getRegClass(Reg)); + const TargetRegisterClass *Allowed = TmpMI->getRegClassConstraintEffect( + OpNo, EquivalentAGPRRegClass, &TII, &TRI); + TmpMI->eraseFromParent(); + if (!Allowed || Allowed != EquivalentAGPRRegClass) + return false; + const MachineOperand *VDst = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst); const MachineOperand *Src2 = @@ -302,10 +319,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR( const TargetRegisterClass *EquivalentAGPRRegClass = TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg)); - // Do not reassign scale operands - if (EquivalentAGPRRegClass == &AMDGPU::AGPR_32RegClass) - return false; - MCPhysReg Assignable = AMDGPU::NoRegister; if (EquivalentAGPRRegClass->contains(PrefPhysReg) && LRM.checkInterference(ReassignLI, PrefPhysReg) == diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir index 12be806960b67..e8c835c76a374 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir @@ -1,7 +1,9 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s -# CHECK-NOT: Illegal virtual register for instruction -# CHECK-NOT: Expected a VGPR_32 register, but got a AGPR_32 register - +# CHECK: bb.1: +# CHECK: dead %{{[0-9]+}}:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, 4, 4, %{{[0-9]+}}, %[[REG:[0-9]+]], 4, 0, implicit $mode, implicit $exec +# CHECK: %{{[0-9]+}}:agpr_32 = IMPLICIT_DEF +# CHECK: %[[REG]]:vgpr_32 = COPY %{{[0-9]+}} + # Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand # in vgpr_32 register to agpr_32, not permitted by instruction format. --- From 08fc310ca9d82acc03f516117cf2e01f65b8189b Mon Sep 17 00:00:00 2001 From: hjagasiaAMD Date: Mon, 1 Dec 2025 21:07:33 -0600 Subject: [PATCH 3/7] Get the static constraint of the known operand. --- .../AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index db3ce200d2aa7..fea4b517db960 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -155,21 +155,11 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( // either AGPR or VGPR in src0/src1. We still need to check constraint // effects for scale variant, which does not allow AGPR. if (isRewriteCandidate(*MI)) { - - int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); - MachineInstrBuilder TmpMIB = - BuildMI(*MI->getParent(), MI->getIterator(), MI->getDebugLoc(), - TII.get(AGPROp)); - for (const MachineOperand &TmpMO : MI->operands()) - TmpMIB.add(TmpMO); - MachineInstr *TmpMI = TmpMIB.getInstr(); unsigned OpNo = &MO - &MI->getOperand(0); - const TargetRegisterClass *EquivalentAGPRRegClass = - TRI.getEquivalentAGPRClass(MRI.getRegClass(Reg)); - const TargetRegisterClass *Allowed = TmpMI->getRegClassConstraintEffect( - OpNo, EquivalentAGPRRegClass, &TII, &TRI); - TmpMI->eraseFromParent(); - if (!Allowed || Allowed != EquivalentAGPRRegClass) + int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); + const MCInstrDesc &AGPRDesc = TII.get(AGPROp); + const TargetRegisterClass *NewRC = TII.getRegClass(AGPRDesc, OpNo); + if (!NewRC || !TRI.hasAGPRs(NewRC)) return false; const MachineOperand *VDst = From aff0f88007a63151c3f1029c3e84803b686a3f35 Mon Sep 17 00:00:00 2001 From: hjagasiaAMD Date: Tue, 2 Dec 2025 11:25:31 -0600 Subject: [PATCH 4/7] Update llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp Co-authored-by: Matt Arsenault --- llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index fea4b517db960..5769deb98db79 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -159,7 +159,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); const MCInstrDesc &AGPRDesc = TII.get(AGPROp); const TargetRegisterClass *NewRC = TII.getRegClass(AGPRDesc, OpNo); - if (!NewRC || !TRI.hasAGPRs(NewRC)) + if (!TRI.hasAGPRs(NewRC)) return false; const MachineOperand *VDst = From 1fc74eac46ebd7cb7c67cf8239ecc8e83c0967be Mon Sep 17 00:00:00 2001 From: hjagasiaAMD Date: Tue, 2 Dec 2025 11:26:26 -0600 Subject: [PATCH 5/7] Update llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp Co-authored-by: Matt Arsenault --- llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index 5769deb98db79..ddd119d8e7fe9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -155,10 +155,9 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( // either AGPR or VGPR in src0/src1. We still need to check constraint // effects for scale variant, which does not allow AGPR. if (isRewriteCandidate(*MI)) { - unsigned OpNo = &MO - &MI->getOperand(0); int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); const MCInstrDesc &AGPRDesc = TII.get(AGPROp); - const TargetRegisterClass *NewRC = TII.getRegClass(AGPRDesc, OpNo); + const TargetRegisterClass *NewRC = TII.getRegClass(AGPRDesc, MO.getOperandNo()); if (!TRI.hasAGPRs(NewRC)) return false; From ef616053a2c7e18441d7c741503f683787df751a Mon Sep 17 00:00:00 2001 From: hjagasiaAMD Date: Tue, 2 Dec 2025 11:27:39 -0600 Subject: [PATCH 6/7] Update llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir Co-authored-by: Matt Arsenault --- llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir index e8c835c76a374..e35927e8bf00d 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s | FileCheck %s # CHECK: bb.1: # CHECK: dead %{{[0-9]+}}:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, 4, 4, %{{[0-9]+}}, %[[REG:[0-9]+]], 4, 0, implicit $mode, implicit $exec # CHECK: %{{[0-9]+}}:agpr_32 = IMPLICIT_DEF From a55eaced54174466b1e73abc51a01834d4cfd866 Mon Sep 17 00:00:00 2001 From: hjagasiaAMD Date: Tue, 2 Dec 2025 11:33:50 -0600 Subject: [PATCH 7/7] Format --- llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index ddd119d8e7fe9..ffbb1c183ca9e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -157,7 +157,8 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( if (isRewriteCandidate(*MI)) { int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); const MCInstrDesc &AGPRDesc = TII.get(AGPROp); - const TargetRegisterClass *NewRC = TII.getRegClass(AGPRDesc, MO.getOperandNo()); + const TargetRegisterClass *NewRC = + TII.getRegClass(AGPRDesc, MO.getOperandNo()); if (!TRI.hasAGPRs(NewRC)) return false;