@@ -726,12 +726,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
726
726
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
727
727
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
728
728
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
729
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1 ], s[2:3], s[2:3] op_sel:[0,1]
730
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3 ], s[6:7], s[6:7] op_sel:[0,1]
729
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3 ], s[2:3], s[2:3] op_sel:[0,1]
730
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5 ], s[6:7], s[6:7] op_sel:[0,1]
731
731
; GFX90A-VGPR-NEXT: s_nop 1
732
- ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[4:5 ], v[0:1 ], v[2:3 ], 0
732
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1 ], v[2:3 ], v[4:5 ], 0
733
733
; GFX90A-VGPR-NEXT: s_nop 3
734
- ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[0:1 ], v[2:3 ], v[4:5 ] cbsz:1 abid:2 blgp:3
734
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3 ], v[4:5 ], v[0:1 ] cbsz:1 abid:2 blgp:3
735
735
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, 0
736
736
; GFX90A-VGPR-NEXT: s_nop 7
737
737
; GFX90A-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -742,12 +742,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
742
742
; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
743
743
; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
744
744
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
745
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1 ], s[2:3]
746
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3 ], s[6:7]
745
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3 ], s[2:3]
746
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5 ], s[6:7]
747
747
; GFX942-VGPR-NEXT: s_nop 1
748
- ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[4:5 ], v[0:1 ], v[2:3 ], 0
748
+ ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1 ], v[2:3 ], v[4:5 ], 0
749
749
; GFX942-VGPR-NEXT: s_nop 3
750
- ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1 ], v[2:3 ], v[4:5 ] cbsz:1 abid:2 neg:[1,1,0]
750
+ ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3 ], v[4:5 ], v[0:1 ] cbsz:1 abid:2 neg:[1,1,0]
751
751
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0
752
752
; GFX942-VGPR-NEXT: s_nop 7
753
753
; GFX942-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -765,10 +765,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
765
765
; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
766
766
; GFX90A-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
767
767
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
768
- ; GFX90A-NEXT: v_mov_b32_e32 v2 , s10
768
+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , s10
769
769
; GFX90A-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
770
- ; GFX90A-NEXT: v_mov_b32_e32 v3 , s11
771
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1 ], s[12:13], s[12:13] op_sel:[0,1]
770
+ ; GFX90A-NEXT: v_mov_b32_e32 v1 , s11
771
+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3 ], s[12:13], s[12:13] op_sel:[0,1]
772
772
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
773
773
; GFX90A-NEXT: v_accvgpr_write_b32 a0, s0
774
774
; GFX90A-NEXT: v_accvgpr_write_b32 a1, s1
@@ -779,7 +779,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
779
779
; GFX90A-NEXT: v_accvgpr_write_b32 a6, s6
780
780
; GFX90A-NEXT: v_accvgpr_write_b32 a7, s7
781
781
; GFX90A-NEXT: s_nop 1
782
- ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7] cbsz:1 abid:2 blgp:3
782
+ ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7] cbsz:1 abid:2 blgp:3
783
783
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
784
784
; GFX90A-NEXT: s_nop 15
785
785
; GFX90A-NEXT: s_nop 0
@@ -792,10 +792,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
792
792
; GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
793
793
; GFX942-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
794
794
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
795
- ; GFX942-NEXT: v_mov_b32_e32 v2 , s10
795
+ ; GFX942-NEXT: v_mov_b32_e32 v0 , s10
796
796
; GFX942-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
797
- ; GFX942-NEXT: v_mov_b32_e32 v3 , s11
798
- ; GFX942-NEXT: v_mov_b64_e32 v[0:1 ], s[12:13]
797
+ ; GFX942-NEXT: v_mov_b32_e32 v1 , s11
798
+ ; GFX942-NEXT: v_mov_b64_e32 v[2:3 ], s[12:13]
799
799
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
800
800
; GFX942-NEXT: v_accvgpr_write_b32 a0, s0
801
801
; GFX942-NEXT: v_accvgpr_write_b32 a1, s1
@@ -806,7 +806,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
806
806
; GFX942-NEXT: v_accvgpr_write_b32 a6, s6
807
807
; GFX942-NEXT: v_accvgpr_write_b32 a7, s7
808
808
; GFX942-NEXT: s_nop 1
809
- ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
809
+ ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
810
810
; GFX942-NEXT: v_mov_b32_e32 v0, 0
811
811
; GFX942-NEXT: s_nop 15
812
812
; GFX942-NEXT: s_nop 0
@@ -819,17 +819,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
819
819
; GFX90A-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
820
820
; GFX90A-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
821
821
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
822
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10 , s10
822
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8 , s10
823
823
; GFX90A-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
824
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11 , s11
825
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9 ], s[12:13], s[12:13] op_sel:[0,1]
824
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v9 , s11
825
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11 ], s[12:13], s[12:13] op_sel:[0,1]
826
826
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
827
827
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
828
828
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
829
829
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
830
830
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
831
831
; GFX90A-VGPR-NEXT: s_nop 1
832
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[10:11 ], v[8:9 ], v[0:7] cbsz:1 abid:2 blgp:3
832
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9 ], v[10:11 ], v[0:7] cbsz:1 abid:2 blgp:3
833
833
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
834
834
; GFX90A-VGPR-NEXT: s_nop 15
835
835
; GFX90A-VGPR-NEXT: s_nop 0
@@ -842,17 +842,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
842
842
; GFX942-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
843
843
; GFX942-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
844
844
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
845
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v10 , s10
845
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8 , s10
846
846
; GFX942-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
847
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v11 , s11
848
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9 ], s[12:13]
847
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v9 , s11
848
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11 ], s[12:13]
849
849
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
850
850
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
851
851
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
852
852
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
853
853
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
854
854
; GFX942-VGPR-NEXT: s_nop 1
855
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11 ], v[8:9 ], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
855
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9 ], v[10:11 ], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
856
856
; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
857
857
; GFX942-VGPR-NEXT: s_nop 15
858
858
; GFX942-VGPR-NEXT: s_nop 0
@@ -1629,20 +1629,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
1629
1629
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
1630
1630
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
1631
1631
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1632
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1633
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1632
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1633
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11 , s3
1634
1634
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v0
1635
1635
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1636
1636
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v0
1637
1637
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1638
1638
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, v0
1639
1639
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1640
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11 ], s[6:7], s[6:7] op_sel:[0,1]
1640
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13 ], s[6:7], s[6:7] op_sel:[0,1]
1641
1641
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1642
1642
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1643
1643
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1644
1644
; GFX90A-VGPR-NEXT: s_nop 1
1645
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1645
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
1646
1646
; GFX90A-VGPR-NEXT: s_nop 15
1647
1647
; GFX90A-VGPR-NEXT: s_nop 1
1648
1648
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1657,20 +1657,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
1657
1657
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
1658
1658
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
1659
1659
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1660
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1661
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1660
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1661
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v11 , s3
1662
1662
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0
1663
1663
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1664
1664
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0
1665
1665
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1666
1666
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
1667
1667
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1668
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11 ], s[6:7]
1668
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13 ], s[6:7]
1669
1669
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1670
1670
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1671
1671
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1672
1672
; GFX942-VGPR-NEXT: s_nop 1
1673
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1673
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
1674
1674
; GFX942-VGPR-NEXT: s_nop 15
1675
1675
; GFX942-VGPR-NEXT: s_nop 1
1676
1676
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1743,20 +1743,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
1743
1743
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
1744
1744
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
1745
1745
; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1746
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1747
- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1746
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1747
+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11 , s3
1748
1748
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
1749
1749
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
1750
1750
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
1751
1751
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
1752
1752
; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
1753
1753
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1754
- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11 ], s[6:7], s[6:7] op_sel:[0,1]
1754
+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13 ], s[6:7], s[6:7] op_sel:[0,1]
1755
1755
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
1756
1756
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
1757
1757
; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1758
1758
; GFX90A-VGPR-NEXT: s_nop 1
1759
- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1759
+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
1760
1760
; GFX90A-VGPR-NEXT: s_nop 15
1761
1761
; GFX90A-VGPR-NEXT: s_nop 1
1762
1762
; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1771,20 +1771,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
1771
1771
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
1772
1772
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
1773
1773
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1774
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1775
- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1774
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1775
+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v11 , s3
1776
1776
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
1777
1777
; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
1778
1778
; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
1779
1779
; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
1780
1780
; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
1781
1781
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1782
- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11 ], s[6:7]
1782
+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13 ], s[6:7]
1783
1783
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
1784
1784
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
1785
1785
; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
1786
1786
; GFX942-VGPR-NEXT: s_nop 1
1787
- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1787
+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
1788
1788
; GFX942-VGPR-NEXT: s_nop 15
1789
1789
; GFX942-VGPR-NEXT: s_nop 1
1790
1790
; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
0 commit comments