@@ -443,10 +443,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
443443; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
444444; GFX90A-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
445445; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
446- ; GFX90A-NEXT: v_mov_b32_e32 v0 , s10
446+ ; GFX90A-NEXT: v_mov_b32_e32 v2 , s10
447447; GFX90A-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
448- ; GFX90A-NEXT: v_mov_b32_e32 v1 , s11
449- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3 ], s[12:13], s[12:13] op_sel:[0,1]
448+ ; GFX90A-NEXT: v_mov_b32_e32 v3 , s11
449+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1 ], s[12:13], s[12:13] op_sel:[0,1]
450450; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
451451; GFX90A-NEXT: v_accvgpr_write_b32 a0, s0
452452; GFX90A-NEXT: v_accvgpr_write_b32 a1, s1
@@ -457,7 +457,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
457457; GFX90A-NEXT: v_accvgpr_write_b32 a6, s6
458458; GFX90A-NEXT: v_accvgpr_write_b32 a7, s7
459459; GFX90A-NEXT: s_nop 1
460- ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7] cbsz:1 abid:2 blgp:3
460+ ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7] cbsz:1 abid:2 blgp:3
461461; GFX90A-NEXT: v_mov_b32_e32 v0, 0
462462; GFX90A-NEXT: s_nop 7
463463; GFX90A-NEXT: s_nop 7
@@ -471,10 +471,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
471471; GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
472472; GFX942-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
473473; GFX942-NEXT: s_waitcnt lgkmcnt(0)
474- ; GFX942-NEXT: v_mov_b32_e32 v0 , s10
474+ ; GFX942-NEXT: v_mov_b32_e32 v2 , s10
475475; GFX942-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
476- ; GFX942-NEXT: v_mov_b32_e32 v1 , s11
477- ; GFX942-NEXT: v_mov_b64_e32 v[2:3 ], s[12:13]
476+ ; GFX942-NEXT: v_mov_b32_e32 v3 , s11
477+ ; GFX942-NEXT: v_mov_b64_e32 v[0:1 ], s[12:13]
478478; GFX942-NEXT: s_waitcnt lgkmcnt(0)
479479; GFX942-NEXT: v_accvgpr_write_b32 a0, s0
480480; GFX942-NEXT: v_accvgpr_write_b32 a1, s1
@@ -485,7 +485,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
485485; GFX942-NEXT: v_accvgpr_write_b32 a6, s6
486486; GFX942-NEXT: v_accvgpr_write_b32 a7, s7
487487; GFX942-NEXT: s_nop 1
488- ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
488+ ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
489489; GFX942-NEXT: v_mov_b32_e32 v0, 0
490490; GFX942-NEXT: s_nop 7
491491; GFX942-NEXT: s_nop 7
@@ -898,20 +898,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
898898; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
899899; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
900900; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
901- ; GFX90A-NEXT: v_mov_b32_e32 v2 , 0x3ff00000
902- ; GFX90A-NEXT: v_accvgpr_write_b32 a7, v2
901+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , 0x3ff00000
902+ ; GFX90A-NEXT: v_accvgpr_write_b32 a7, v0
903903; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
904- ; GFX90A-NEXT: v_mov_b32_e32 v0 , s2
905- ; GFX90A-NEXT: v_mov_b32_e32 v1 , s3
904+ ; GFX90A-NEXT: v_mov_b32_e32 v2 , s2
905+ ; GFX90A-NEXT: v_mov_b32_e32 v3 , s3
906906; GFX90A-NEXT: v_accvgpr_mov_b32 a1, a0
907907; GFX90A-NEXT: v_accvgpr_mov_b32 a2, a0
908908; GFX90A-NEXT: v_accvgpr_mov_b32 a3, a0
909909; GFX90A-NEXT: v_accvgpr_mov_b32 a4, a0
910910; GFX90A-NEXT: v_accvgpr_mov_b32 a5, a0
911911; GFX90A-NEXT: v_accvgpr_mov_b32 a6, a0
912- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3 ], s[6:7], s[6:7] op_sel:[0,1]
912+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1 ], s[6:7], s[6:7] op_sel:[0,1]
913913; GFX90A-NEXT: s_nop 1
914- ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7]
914+ ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7]
915915; GFX90A-NEXT: v_mov_b32_e32 v0, 0
916916; GFX90A-NEXT: s_nop 7
917917; GFX90A-NEXT: s_nop 7
@@ -925,20 +925,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
925925; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
926926; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
927927; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
928- ; GFX942-NEXT: v_mov_b32_e32 v2 , 0x3ff00000
929- ; GFX942-NEXT: v_accvgpr_write_b32 a7, v2
928+ ; GFX942-NEXT: v_mov_b32_e32 v0 , 0x3ff00000
929+ ; GFX942-NEXT: v_accvgpr_write_b32 a7, v0
930930; GFX942-NEXT: s_waitcnt lgkmcnt(0)
931- ; GFX942-NEXT: v_mov_b32_e32 v0 , s2
932- ; GFX942-NEXT: v_mov_b32_e32 v1 , s3
931+ ; GFX942-NEXT: v_mov_b32_e32 v2 , s2
932+ ; GFX942-NEXT: v_mov_b32_e32 v3 , s3
933933; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0
934934; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
935935; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0
936936; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
937937; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0
938938; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
939- ; GFX942-NEXT: v_mov_b64_e32 v[2:3 ], s[6:7]
939+ ; GFX942-NEXT: v_mov_b64_e32 v[0:1 ], s[6:7]
940940; GFX942-NEXT: s_nop 1
941- ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7]
941+ ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7]
942942; GFX942-NEXT: v_mov_b32_e32 v0, 0
943943; GFX942-NEXT: s_nop 7
944944; GFX942-NEXT: s_nop 7
@@ -957,21 +957,21 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
957957; GFX90A: ; %bb.0: ; %bb
958958; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
959959; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
960- ; GFX90A-NEXT: v_mov_b32_e32 v2 , 0x405ec000
960+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , 0x405ec000
961961; GFX90A-NEXT: v_accvgpr_write_b32 a0, 0
962- ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
962+ ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v0
963963; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
964- ; GFX90A-NEXT: v_mov_b32_e32 v0 , s2
965- ; GFX90A-NEXT: v_mov_b32_e32 v1 , s3
964+ ; GFX90A-NEXT: v_mov_b32_e32 v2 , s2
965+ ; GFX90A-NEXT: v_mov_b32_e32 v3 , s3
966966; GFX90A-NEXT: v_accvgpr_mov_b32 a2, a0
967967; GFX90A-NEXT: v_accvgpr_mov_b32 a3, a1
968968; GFX90A-NEXT: v_accvgpr_mov_b32 a4, a0
969969; GFX90A-NEXT: v_accvgpr_mov_b32 a5, a1
970970; GFX90A-NEXT: v_accvgpr_mov_b32 a6, a0
971971; GFX90A-NEXT: v_accvgpr_mov_b32 a7, a1
972- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3 ], s[6:7], s[6:7] op_sel:[0,1]
972+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1 ], s[6:7], s[6:7] op_sel:[0,1]
973973; GFX90A-NEXT: s_nop 1
974- ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7]
974+ ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7]
975975; GFX90A-NEXT: v_mov_b32_e32 v0, 0
976976; GFX90A-NEXT: s_nop 7
977977; GFX90A-NEXT: s_nop 7
@@ -984,21 +984,21 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
984984; GFX942: ; %bb.0: ; %bb
985985; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
986986; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
987- ; GFX942-NEXT: v_mov_b32_e32 v2 , 0x405ec000
987+ ; GFX942-NEXT: v_mov_b32_e32 v0 , 0x405ec000
988988; GFX942-NEXT: v_accvgpr_write_b32 a0, 0
989- ; GFX942-NEXT: v_accvgpr_write_b32 a1, v2
989+ ; GFX942-NEXT: v_accvgpr_write_b32 a1, v0
990990; GFX942-NEXT: s_waitcnt lgkmcnt(0)
991- ; GFX942-NEXT: v_mov_b32_e32 v0 , s2
992- ; GFX942-NEXT: v_mov_b32_e32 v1 , s3
991+ ; GFX942-NEXT: v_mov_b32_e32 v2 , s2
992+ ; GFX942-NEXT: v_mov_b32_e32 v3 , s3
993993; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0
994994; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1
995995; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0
996996; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1
997997; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0
998998; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1
999- ; GFX942-NEXT: v_mov_b64_e32 v[2:3 ], s[6:7]
999+ ; GFX942-NEXT: v_mov_b64_e32 v[0:1 ], s[6:7]
10001000; GFX942-NEXT: s_nop 1
1001- ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7]
1001+ ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7]
10021002; GFX942-NEXT: v_mov_b32_e32 v0, 0
10031003; GFX942-NEXT: s_nop 7
10041004; GFX942-NEXT: s_nop 7
0 commit comments