diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index 095f23ad22d97..ffbb1c183ca9e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -97,8 +97,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl { /// Compute the register class constraints based on the uses of \p Reg, /// excluding MFMA uses from which can be rewritten to change the register - /// class constraint. This should be nearly identical to - /// MachineRegisterInfo::recomputeRegClass. + /// class constraint. MFMA scale operands need to be constraint checked. + /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass. /// \p RewriteCandidates will collect the set of MFMA instructions that need /// to have the opcode mutated to perform the replacement. @@ -152,9 +152,16 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the // effects of rewrite candidates. It just so happens that we can use - // either AGPR or VGPR in src0/src1, so don't bother checking the - // constraint effects of the individual operands. + // either AGPR or VGPR in src0/src1. We still need to check constraint + // effects for scale variant, which does not allow AGPR. if (isRewriteCandidate(*MI)) { + int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); + const MCInstrDesc &AGPRDesc = TII.get(AGPROp); + const TargetRegisterClass *NewRC = + TII.getRegClass(AGPRDesc, MO.getOperandNo()); + if (!TRI.hasAGPRs(NewRC)) + return false; + const MachineOperand *VDst = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst); const MachineOperand *Src2 = diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir index 999ea42910d92..e35927e8bf00d 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-scale-to-agpr.mir @@ -1,7 +1,9 @@ -# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s 2>&1 | FileCheck %s -# CHECK: Illegal virtual register for instruction -# CHECK: Expected a VGPR_32 register, but got a AGPR_32 register - +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=greedy,amdgpu-rewrite-agpr-copy-mfma -verify-machineinstrs -o - %s | FileCheck %s +# CHECK: bb.1: +# CHECK: dead %{{[0-9]+}}:vreg_128_align2 = V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, 4, 4, %{{[0-9]+}}, %[[REG:[0-9]+]], 4, 0, implicit $mode, implicit $exec +# CHECK: %{{[0-9]+}}:agpr_32 = IMPLICIT_DEF +# CHECK: %[[REG]]:vgpr_32 = COPY %{{[0-9]+}} + # Test for issue in amdgpu-rewrite-agpr-copy-mfma, which reassigns scale operand # in vgpr_32 register to agpr_32, not permitted by instruction format. ---