Skip to content

Commit 9048fb4

Browse files
authored
AMDGPU: Remove unnecessary AGPR operand legalization (#162093)
The operands need to be correct to begin with, this doesn't depend on the context of other operands. AV registers are not used for the vdst/src2 registers.
1 parent 454ef02 commit 9048fb4

File tree

7 files changed

+930
-1141
lines changed

7 files changed

+930
-1141
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 13 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -17346,75 +17346,24 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1734617346

1734717347
MachineFunction *MF = MI.getParent()->getParent();
1734817348
MachineRegisterInfo &MRI = MF->getRegInfo();
17349-
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1735017349

1735117350
if (TII->isVOP3(MI.getOpcode())) {
1735217351
// Make sure constant bus requirements are respected.
1735317352
TII->legalizeOperandsVOP3(MRI, MI);
1735417353

17355-
// Prefer VGPRs over AGPRs in mAI instructions where possible.
17356-
// This saves a chain-copy of registers and better balance register
17357-
// use between vgpr and agpr as agpr tuples tend to be big.
17358-
if (!MI.getDesc().operands().empty()) {
17359-
unsigned Opc = MI.getOpcode();
17360-
bool HasAGPRs =
17361-
!Subtarget->hasGFX90AInsts() || Info->getMinNumAGPRs() != 0;
17362-
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
17363-
int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
17364-
for (auto I :
17365-
{AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
17366-
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) {
17367-
if (I == -1)
17368-
break;
17369-
if ((I == Src2Idx) && (HasAGPRs))
17370-
break;
17371-
MachineOperand &Op = MI.getOperand(I);
17372-
if (!Op.isReg() || !Op.getReg().isVirtual())
17373-
continue;
17374-
auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
17375-
if (!TRI->hasAGPRs(RC))
17376-
continue;
17377-
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
17378-
if (!Src || !Src->isCopy() ||
17379-
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
17380-
continue;
17381-
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
17382-
// All uses of agpr64 and agpr32 can also accept vgpr except for
17383-
// v_accvgpr_read, but we do not produce agpr reads during selection,
17384-
// so no use checks are needed.
17385-
MRI.setRegClass(Op.getReg(), NewRC);
17386-
}
17387-
17388-
if (TII->isMAI(MI)) {
17389-
// The ordinary src0, src1, src2 were legalized above.
17390-
//
17391-
// We have to also legalize the appended v_mfma_ld_scale_b32 operands,
17392-
// as a separate instruction.
17393-
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17394-
AMDGPU::OpName::scale_src0);
17395-
if (Src0Idx != -1) {
17396-
int Src1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17397-
AMDGPU::OpName::scale_src1);
17398-
if (TII->usesConstantBus(MRI, MI, Src0Idx) &&
17399-
TII->usesConstantBus(MRI, MI, Src1Idx))
17400-
TII->legalizeOpWithMove(MI, Src1Idx);
17401-
}
17402-
}
17403-
17404-
if (!HasAGPRs)
17405-
return;
17406-
17407-
// Resolve the rest of AV operands to AGPRs.
17408-
if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
17409-
if (Src2->isReg() && Src2->getReg().isVirtual()) {
17410-
auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
17411-
if (TRI->isVectorSuperClass(RC)) {
17412-
auto *NewRC = TRI->getEquivalentAGPRClass(RC);
17413-
MRI.setRegClass(Src2->getReg(), NewRC);
17414-
if (Src2->isTied())
17415-
MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
17416-
}
17417-
}
17354+
if (TII->isMAI(MI)) {
17355+
// The ordinary src0, src1, src2 were legalized above.
17356+
//
17357+
// We have to also legalize the appended v_mfma_ld_scale_b32 operands,
17358+
// as a separate instruction.
17359+
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17360+
AMDGPU::OpName::scale_src0);
17361+
if (Src0Idx != -1) {
17362+
int Src1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17363+
AMDGPU::OpName::scale_src1);
17364+
if (TII->usesConstantBus(MRI, MI, Src0Idx) &&
17365+
TII->usesConstantBus(MRI, MI, Src1Idx))
17366+
TII->legalizeOpWithMove(MI, Src1Idx);
1741817367
}
1741917368
}
1742017369

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -726,12 +726,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
726726
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
727727
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
728728
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
729-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
730-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
729+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
730+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[6:7], s[6:7] op_sel:[0,1]
731731
; GFX90A-VGPR-NEXT: s_nop 1
732-
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[4:5], v[0:1], v[2:3], 0
732+
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], 0
733733
; GFX90A-VGPR-NEXT: s_nop 3
734-
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 blgp:3
734+
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 blgp:3
735735
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, 0
736736
; GFX90A-VGPR-NEXT: s_nop 7
737737
; GFX90A-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -742,12 +742,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
742742
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
743743
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
744744
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
745-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
746-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
745+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
746+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[6:7]
747747
; GFX942-VGPR-NEXT: s_nop 1
748-
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[4:5], v[0:1], v[2:3], 0
748+
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], 0
749749
; GFX942-VGPR-NEXT: s_nop 3
750-
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 neg:[1,1,0]
750+
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 neg:[1,1,0]
751751
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0
752752
; GFX942-VGPR-NEXT: s_nop 7
753753
; GFX942-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -765,10 +765,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
765765
; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
766766
; GFX90A-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
767767
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
768-
; GFX90A-NEXT: v_mov_b32_e32 v2, s10
768+
; GFX90A-NEXT: v_mov_b32_e32 v0, s10
769769
; GFX90A-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
770-
; GFX90A-NEXT: v_mov_b32_e32 v3, s11
771-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[12:13], s[12:13] op_sel:[0,1]
770+
; GFX90A-NEXT: v_mov_b32_e32 v1, s11
771+
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
772772
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
773773
; GFX90A-NEXT: v_accvgpr_write_b32 a0, s0
774774
; GFX90A-NEXT: v_accvgpr_write_b32 a1, s1
@@ -779,7 +779,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
779779
; GFX90A-NEXT: v_accvgpr_write_b32 a6, s6
780780
; GFX90A-NEXT: v_accvgpr_write_b32 a7, s7
781781
; GFX90A-NEXT: s_nop 1
782-
; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 blgp:3
782+
; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
783783
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
784784
; GFX90A-NEXT: s_nop 15
785785
; GFX90A-NEXT: s_nop 0
@@ -792,10 +792,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
792792
; GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
793793
; GFX942-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
794794
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
795-
; GFX942-NEXT: v_mov_b32_e32 v2, s10
795+
; GFX942-NEXT: v_mov_b32_e32 v0, s10
796796
; GFX942-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
797-
; GFX942-NEXT: v_mov_b32_e32 v3, s11
798-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[12:13]
797+
; GFX942-NEXT: v_mov_b32_e32 v1, s11
798+
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
799799
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
800800
; GFX942-NEXT: v_accvgpr_write_b32 a0, s0
801801
; GFX942-NEXT: v_accvgpr_write_b32 a1, s1
@@ -806,7 +806,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
806806
; GFX942-NEXT: v_accvgpr_write_b32 a6, s6
807807
; GFX942-NEXT: v_accvgpr_write_b32 a7, s7
808808
; GFX942-NEXT: s_nop 1
809-
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
809+
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
810810
; GFX942-NEXT: v_mov_b32_e32 v0, 0
811811
; GFX942-NEXT: s_nop 15
812812
; GFX942-NEXT: s_nop 0
@@ -819,17 +819,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
819819
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
820820
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
821821
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
822-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s10
822+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, s10
823823
; GFX90A-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
824-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s11
825-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[12:13], s[12:13] op_sel:[0,1]
824+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v9, s11
825+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[12:13], s[12:13] op_sel:[0,1]
826826
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
827827
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
828828
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
829829
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
830830
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
831831
; GFX90A-VGPR-NEXT: s_nop 1
832-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 blgp:3
832+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
833833
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
834834
; GFX90A-VGPR-NEXT: s_nop 15
835835
; GFX90A-VGPR-NEXT: s_nop 0
@@ -842,17 +842,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
842842
; GFX942-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
843843
; GFX942-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
844844
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
845-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s10
845+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, s10
846846
; GFX942-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
847-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s11
848-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[12:13]
847+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, s11
848+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[12:13]
849849
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
850850
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
851851
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
852852
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
853853
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
854854
; GFX942-VGPR-NEXT: s_nop 1
855-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
855+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
856856
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
857857
; GFX942-VGPR-NEXT: s_nop 15
858858
; GFX942-VGPR-NEXT: s_nop 0
@@ -1629,20 +1629,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
16291629
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
16301630
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
16311631
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1632-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12, s2
1633-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13, s3
1632+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s2
1633+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s3
16341634
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v0
16351635
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
16361636
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v0
16371637
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
16381638
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, v0
16391639
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1640-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
1640+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
16411641
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
16421642
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
16431643
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
16441644
; GFX90A-VGPR-NEXT: s_nop 1
1645-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9]
1645+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
16461646
; GFX90A-VGPR-NEXT: s_nop 15
16471647
; GFX90A-VGPR-NEXT: s_nop 1
16481648
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1657,20 +1657,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
16571657
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
16581658
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
16591659
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1660-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2
1661-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3
1660+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2
1661+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3
16621662
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0
16631663
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
16641664
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0
16651665
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
16661666
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
16671667
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1668-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
1668+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
16691669
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
16701670
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
16711671
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
16721672
; GFX942-VGPR-NEXT: s_nop 1
1673-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
1673+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
16741674
; GFX942-VGPR-NEXT: s_nop 15
16751675
; GFX942-VGPR-NEXT: s_nop 1
16761676
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1743,20 +1743,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
17431743
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
17441744
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
17451745
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1746-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12, s2
1747-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13, s3
1746+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s2
1747+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s3
17481748
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
17491749
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
17501750
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
17511751
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
17521752
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
17531753
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1754-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
1754+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
17551755
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
17561756
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
17571757
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
17581758
; GFX90A-VGPR-NEXT: s_nop 1
1759-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9]
1759+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
17601760
; GFX90A-VGPR-NEXT: s_nop 15
17611761
; GFX90A-VGPR-NEXT: s_nop 1
17621762
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1771,20 +1771,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
17711771
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
17721772
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
17731773
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1774-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2
1775-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3
1774+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2
1775+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3
17761776
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
17771777
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
17781778
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
17791779
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
17801780
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
17811781
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1782-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
1782+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
17831783
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
17841784
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
17851785
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
17861786
; GFX942-VGPR-NEXT: s_nop 1
1787-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
1787+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
17881788
; GFX942-VGPR-NEXT: s_nop 15
17891789
; GFX942-VGPR-NEXT: s_nop 1
17901790
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16

0 commit comments

Comments
 (0)