Skip to content

Commit c94ce69

Browse files
committed
AMDGPU: Remove unnecessary AGPR operand legalization
The operands need to be correct to begin with, this doesn't depend on the context of other operands. AV registers are not used for the vdst/src2 registers.
1 parent 764d91d commit c94ce69

File tree

7 files changed

+930
-1141
lines changed

7 files changed

+930
-1141
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 13 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -17346,75 +17346,24 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1734617346

1734717347
MachineFunction *MF = MI.getParent()->getParent();
1734817348
MachineRegisterInfo &MRI = MF->getRegInfo();
17349-
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1735017349

1735117350
if (TII->isVOP3(MI.getOpcode())) {
1735217351
// Make sure constant bus requirements are respected.
1735317352
TII->legalizeOperandsVOP3(MRI, MI);
1735417353

17355-
// Prefer VGPRs over AGPRs in mAI instructions where possible.
17356-
// This saves a chain-copy of registers and better balance register
17357-
// use between vgpr and agpr as agpr tuples tend to be big.
17358-
if (!MI.getDesc().operands().empty()) {
17359-
unsigned Opc = MI.getOpcode();
17360-
bool HasAGPRs =
17361-
!Subtarget->hasGFX90AInsts() || Info->getMinNumAGPRs() != 0;
17362-
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
17363-
int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
17364-
for (auto I :
17365-
{AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
17366-
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) {
17367-
if (I == -1)
17368-
break;
17369-
if ((I == Src2Idx) && (HasAGPRs))
17370-
break;
17371-
MachineOperand &Op = MI.getOperand(I);
17372-
if (!Op.isReg() || !Op.getReg().isVirtual())
17373-
continue;
17374-
auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
17375-
if (!TRI->hasAGPRs(RC))
17376-
continue;
17377-
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
17378-
if (!Src || !Src->isCopy() ||
17379-
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
17380-
continue;
17381-
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
17382-
// All uses of agpr64 and agpr32 can also accept vgpr except for
17383-
// v_accvgpr_read, but we do not produce agpr reads during selection,
17384-
// so no use checks are needed.
17385-
MRI.setRegClass(Op.getReg(), NewRC);
17386-
}
17387-
17388-
if (TII->isMAI(MI)) {
17389-
// The ordinary src0, src1, src2 were legalized above.
17390-
//
17391-
// We have to also legalize the appended v_mfma_ld_scale_b32 operands,
17392-
// as a separate instruction.
17393-
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17394-
AMDGPU::OpName::scale_src0);
17395-
if (Src0Idx != -1) {
17396-
int Src1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17397-
AMDGPU::OpName::scale_src1);
17398-
if (TII->usesConstantBus(MRI, MI, Src0Idx) &&
17399-
TII->usesConstantBus(MRI, MI, Src1Idx))
17400-
TII->legalizeOpWithMove(MI, Src1Idx);
17401-
}
17402-
}
17403-
17404-
if (!HasAGPRs)
17405-
return;
17406-
17407-
// Resolve the rest of AV operands to AGPRs.
17408-
if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
17409-
if (Src2->isReg() && Src2->getReg().isVirtual()) {
17410-
auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
17411-
if (TRI->isVectorSuperClass(RC)) {
17412-
auto *NewRC = TRI->getEquivalentAGPRClass(RC);
17413-
MRI.setRegClass(Src2->getReg(), NewRC);
17414-
if (Src2->isTied())
17415-
MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
17416-
}
17417-
}
17354+
if (TII->isMAI(MI)) {
17355+
// The ordinary src0, src1, src2 were legalized above.
17356+
//
17357+
// We have to also legalize the appended v_mfma_ld_scale_b32 operands,
17358+
// as a separate instruction.
17359+
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17360+
AMDGPU::OpName::scale_src0);
17361+
if (Src0Idx != -1) {
17362+
int Src1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
17363+
AMDGPU::OpName::scale_src1);
17364+
if (TII->usesConstantBus(MRI, MI, Src0Idx) &&
17365+
TII->usesConstantBus(MRI, MI, Src1Idx))
17366+
TII->legalizeOpWithMove(MI, Src1Idx);
1741817367
}
1741917368
}
1742017369

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -726,12 +726,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
726726
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
727727
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
728728
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
729-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
730-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
729+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
730+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[6:7], s[6:7] op_sel:[0,1]
731731
; GFX90A-VGPR-NEXT: s_nop 1
732-
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[4:5], v[0:1], v[2:3], 0
732+
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], 0
733733
; GFX90A-VGPR-NEXT: s_nop 3
734-
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 blgp:3
734+
; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 blgp:3
735735
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, 0
736736
; GFX90A-VGPR-NEXT: s_nop 7
737737
; GFX90A-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -742,12 +742,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
742742
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
743743
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
744744
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
745-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
746-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
745+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
746+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[6:7]
747747
; GFX942-VGPR-NEXT: s_nop 1
748-
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[4:5], v[0:1], v[2:3], 0
748+
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], 0
749749
; GFX942-VGPR-NEXT: s_nop 3
750-
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 neg:[1,1,0]
750+
; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 neg:[1,1,0]
751751
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0
752752
; GFX942-VGPR-NEXT: s_nop 7
753753
; GFX942-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -765,10 +765,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
765765
; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
766766
; GFX90A-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
767767
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
768-
; GFX90A-NEXT: v_mov_b32_e32 v2, s10
768+
; GFX90A-NEXT: v_mov_b32_e32 v0, s10
769769
; GFX90A-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
770-
; GFX90A-NEXT: v_mov_b32_e32 v3, s11
771-
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[12:13], s[12:13] op_sel:[0,1]
770+
; GFX90A-NEXT: v_mov_b32_e32 v1, s11
771+
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
772772
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
773773
; GFX90A-NEXT: v_accvgpr_write_b32 a0, s0
774774
; GFX90A-NEXT: v_accvgpr_write_b32 a1, s1
@@ -779,7 +779,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
779779
; GFX90A-NEXT: v_accvgpr_write_b32 a6, s6
780780
; GFX90A-NEXT: v_accvgpr_write_b32 a7, s7
781781
; GFX90A-NEXT: s_nop 1
782-
; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 blgp:3
782+
; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
783783
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
784784
; GFX90A-NEXT: s_nop 15
785785
; GFX90A-NEXT: s_nop 0
@@ -792,10 +792,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
792792
; GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
793793
; GFX942-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
794794
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
795-
; GFX942-NEXT: v_mov_b32_e32 v2, s10
795+
; GFX942-NEXT: v_mov_b32_e32 v0, s10
796796
; GFX942-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
797-
; GFX942-NEXT: v_mov_b32_e32 v3, s11
798-
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[12:13]
797+
; GFX942-NEXT: v_mov_b32_e32 v1, s11
798+
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[12:13]
799799
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
800800
; GFX942-NEXT: v_accvgpr_write_b32 a0, s0
801801
; GFX942-NEXT: v_accvgpr_write_b32 a1, s1
@@ -806,7 +806,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
806806
; GFX942-NEXT: v_accvgpr_write_b32 a6, s6
807807
; GFX942-NEXT: v_accvgpr_write_b32 a7, s7
808808
; GFX942-NEXT: s_nop 1
809-
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
809+
; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
810810
; GFX942-NEXT: v_mov_b32_e32 v0, 0
811811
; GFX942-NEXT: s_nop 15
812812
; GFX942-NEXT: s_nop 0
@@ -819,17 +819,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
819819
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
820820
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
821821
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
822-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s10
822+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, s10
823823
; GFX90A-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
824-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s11
825-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], s[12:13], s[12:13] op_sel:[0,1]
824+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v9, s11
825+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[12:13], s[12:13] op_sel:[0,1]
826826
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
827827
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
828828
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
829829
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
830830
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
831831
; GFX90A-VGPR-NEXT: s_nop 1
832-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 blgp:3
832+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
833833
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
834834
; GFX90A-VGPR-NEXT: s_nop 15
835835
; GFX90A-VGPR-NEXT: s_nop 0
@@ -842,17 +842,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
842842
; GFX942-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
843843
; GFX942-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
844844
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
845-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s10
845+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, s10
846846
; GFX942-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
847-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s11
848-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[12:13]
847+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, s11
848+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[12:13]
849849
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
850850
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
851851
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
852852
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
853853
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
854854
; GFX942-VGPR-NEXT: s_nop 1
855-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
855+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
856856
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
857857
; GFX942-VGPR-NEXT: s_nop 15
858858
; GFX942-VGPR-NEXT: s_nop 0
@@ -1629,20 +1629,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
16291629
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
16301630
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
16311631
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1632-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12, s2
1633-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13, s3
1632+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s2
1633+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s3
16341634
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v0
16351635
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
16361636
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v0
16371637
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
16381638
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, v0
16391639
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1640-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
1640+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
16411641
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
16421642
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
16431643
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
16441644
; GFX90A-VGPR-NEXT: s_nop 1
1645-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9]
1645+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
16461646
; GFX90A-VGPR-NEXT: s_nop 15
16471647
; GFX90A-VGPR-NEXT: s_nop 1
16481648
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1657,20 +1657,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
16571657
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
16581658
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
16591659
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1660-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2
1661-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3
1660+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2
1661+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3
16621662
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0
16631663
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
16641664
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0
16651665
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
16661666
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
16671667
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1668-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
1668+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
16691669
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
16701670
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
16711671
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
16721672
; GFX942-VGPR-NEXT: s_nop 1
1673-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
1673+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
16741674
; GFX942-VGPR-NEXT: s_nop 15
16751675
; GFX942-VGPR-NEXT: s_nop 1
16761676
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1743,20 +1743,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
17431743
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
17441744
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
17451745
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1746-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12, s2
1747-
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13, s3
1746+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10, s2
1747+
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11, s3
17481748
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
17491749
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
17501750
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
17511751
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
17521752
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
17531753
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1754-
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
1754+
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
17551755
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
17561756
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
17571757
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
17581758
; GFX90A-VGPR-NEXT: s_nop 1
1759-
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9]
1759+
; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
17601760
; GFX90A-VGPR-NEXT: s_nop 15
17611761
; GFX90A-VGPR-NEXT: s_nop 1
17621762
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1771,20 +1771,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
17711771
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
17721772
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
17731773
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1774-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2
1775-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3
1774+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2
1775+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3
17761776
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
17771777
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
17781778
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
17791779
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
17801780
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
17811781
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1782-
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7]
1782+
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7]
17831783
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
17841784
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
17851785
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
17861786
; GFX942-VGPR-NEXT: s_nop 1
1787-
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
1787+
; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
17881788
; GFX942-VGPR-NEXT: s_nop 15
17891789
; GFX942-VGPR-NEXT: s_nop 1
17901790
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16

0 commit comments

Comments
 (0)