@@ -19,7 +19,7 @@ define <4 x float> @test_mfma_f32_16x16x32_f16(<8 x half> %arg0, <8 x half> %arg
1919; GCN-NEXT: v_accvgpr_write_b32 a3, v11
2020; GCN-NEXT: s_nop 1
2121; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
22- ; GCN-NEXT: s_nop 6
22+ ; GCN-NEXT: s_nop 7
2323; GCN-NEXT: v_accvgpr_read_b32 v0, a0
2424; GCN-NEXT: v_accvgpr_read_b32 v1, a1
2525; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -39,7 +39,7 @@ define <4 x float> @test_mfma_f32_16x16x32_f16__flags(<8 x half> %arg0, <8 x hal
3939; GCN-NEXT: v_accvgpr_write_b32 a3, v11
4040; GCN-NEXT: s_nop 1
4141; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1
42- ; GCN-NEXT: s_nop 6
42+ ; GCN-NEXT: s_nop 7
4343; GCN-NEXT: v_accvgpr_read_b32 v0, a0
4444; GCN-NEXT: v_accvgpr_read_b32 v1, a1
4545; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -67,7 +67,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
6767; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
6868; SDAG-NEXT: s_nop 1
6969; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
70- ; SDAG-NEXT: s_nop 6
70+ ; SDAG-NEXT: s_nop 7
7171; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
7272; SDAG-NEXT: s_endpgm
7373;
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
8888; GISEL-NEXT: s_nop 1
8989; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
9090; GISEL-NEXT: v_mov_b32_e32 v0, 0
91- ; GISEL-NEXT: s_nop 5
91+ ; GISEL-NEXT: s_nop 6
9292; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
9393; GISEL-NEXT: s_endpgm
9494 %result = call <4 x float > @llvm.amdgcn.mfma.f32.16x16x32.f16 (<8 x half > %arg0 , <8 x half > %arg1 , <4 x float > %arg2 , i32 0 , i32 0 , i32 0 )
@@ -114,7 +114,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
114114; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
115115; SDAG-NEXT: s_nop 1
116116; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
117- ; SDAG-NEXT: s_nop 6
117+ ; SDAG-NEXT: s_nop 7
118118; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
119119; SDAG-NEXT: s_endpgm
120120;
@@ -135,7 +135,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
135135; GISEL-NEXT: s_nop 1
136136; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
137137; GISEL-NEXT: v_mov_b32_e32 v0, 0
138- ; GISEL-NEXT: s_nop 5
138+ ; GISEL-NEXT: s_nop 6
139139; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
140140; GISEL-NEXT: s_endpgm
141141 %result = call <4 x float > @llvm.amdgcn.mfma.f32.16x16x32.f16 (<8 x half > %arg0 , <8 x half > %arg1 , <4 x float > %arg2 , i32 3 , i32 2 , i32 1 )
@@ -186,7 +186,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
186186; SDAG-NEXT: v_mov_b32_e32 v9, s17
187187; SDAG-NEXT: v_mov_b32_e32 v10, s18
188188; SDAG-NEXT: v_mov_b32_e32 v11, s19
189- ; SDAG-NEXT: s_nop 3
189+ ; SDAG-NEXT: s_nop 4
190190; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
191191; SDAG-NEXT: s_waitcnt vmcnt(0)
192192; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -253,7 +253,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
253253; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
254254; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
255255; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
256- ; GISEL-NEXT: s_nop 3
256+ ; GISEL-NEXT: s_nop 4
257257; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
258258; GISEL-NEXT: s_waitcnt vmcnt(0)
259259; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -316,7 +316,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
316316; SDAG-NEXT: v_mov_b32_e32 v9, s17
317317; SDAG-NEXT: v_mov_b32_e32 v10, s18
318318; SDAG-NEXT: v_mov_b32_e32 v11, s19
319- ; SDAG-NEXT: s_nop 3
319+ ; SDAG-NEXT: s_nop 4
320320; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
321321; SDAG-NEXT: s_waitcnt vmcnt(0)
322322; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -383,7 +383,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
383383; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
384384; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
385385; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
386- ; GISEL-NEXT: s_nop 3
386+ ; GISEL-NEXT: s_nop 4
387387; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
388388; GISEL-NEXT: s_waitcnt vmcnt(0)
389389; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -430,7 +430,7 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__mac(<8 x half> %arg0, <8 x half
430430; GCN-NEXT: s_nop 1
431431; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
432432; GCN-NEXT: s_nop 7
433- ; GCN-NEXT: s_nop 2
433+ ; GCN-NEXT: s_nop 3
434434; GCN-NEXT: v_accvgpr_read_b32 v0, a0
435435; GCN-NEXT: v_accvgpr_read_b32 v1, a1
436436; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -475,7 +475,7 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__mac__flags(<8 x half> %arg0, <8
475475; GCN-NEXT: s_nop 1
476476; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
477477; GCN-NEXT: s_nop 7
478- ; GCN-NEXT: s_nop 2
478+ ; GCN-NEXT: s_nop 3
479479; GCN-NEXT: v_accvgpr_read_b32 v0, a0
480480; GCN-NEXT: v_accvgpr_read_b32 v1, a1
481481; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -776,7 +776,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar
776776; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
777777; SDAG-NEXT: v_mov_b32_e32 v0, 0
778778; SDAG-NEXT: s_nop 7
779- ; SDAG-NEXT: s_nop 1
779+ ; SDAG-NEXT: s_nop 2
780780; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
781781; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
782782; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -813,7 +813,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar
813813; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
814814; GISEL-NEXT: v_mov_b32_e32 v0, 0
815815; GISEL-NEXT: s_nop 7
816- ; GISEL-NEXT: s_nop 1
816+ ; GISEL-NEXT: s_nop 2
817817; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
818818; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
819819; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -855,7 +855,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal
855855; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
856856; SDAG-NEXT: v_mov_b32_e32 v0, 0
857857; SDAG-NEXT: s_nop 7
858- ; SDAG-NEXT: s_nop 1
858+ ; SDAG-NEXT: s_nop 2
859859; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
860860; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
861861; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -892,7 +892,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal
892892; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
893893; GISEL-NEXT: v_mov_b32_e32 v0, 0
894894; GISEL-NEXT: s_nop 7
895- ; GISEL-NEXT: s_nop 1
895+ ; GISEL-NEXT: s_nop 2
896896; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
897897; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
898898; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -919,7 +919,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8(<4 x i32> %arg0, <4 x i32> %arg1, <4
919919; GCN-NEXT: v_accvgpr_write_b32 a3, v11
920920; GCN-NEXT: s_nop 1
921921; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3]
922- ; GCN-NEXT: s_nop 6
922+ ; GCN-NEXT: s_nop 7
923923; GCN-NEXT: v_accvgpr_read_b32 v0, a0
924924; GCN-NEXT: v_accvgpr_read_b32 v1, a1
925925; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -939,7 +939,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8__flags(<4 x i32> %arg0, <4 x i32> %a
939939; GCN-NEXT: v_accvgpr_write_b32 a3, v11
940940; GCN-NEXT: s_nop 1
941941; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1
942- ; GCN-NEXT: s_nop 6
942+ ; GCN-NEXT: s_nop 7
943943; GCN-NEXT: v_accvgpr_read_b32 v0, a0
944944; GCN-NEXT: v_accvgpr_read_b32 v1, a1
945945; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -971,7 +971,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
971971; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
972972; SDAG-NEXT: s_nop 1
973973; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3]
974- ; SDAG-NEXT: s_nop 6
974+ ; SDAG-NEXT: s_nop 7
975975; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
976976; SDAG-NEXT: s_endpgm
977977;
@@ -992,7 +992,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
992992; GISEL-NEXT: s_nop 1
993993; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3]
994994; GISEL-NEXT: v_mov_b32_e32 v0, 0
995- ; GISEL-NEXT: s_nop 5
995+ ; GISEL-NEXT: s_nop 6
996996; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
997997; GISEL-NEXT: s_endpgm
998998 %result = call <4 x i32 > @llvm.amdgcn.mfma.i32.16x16x64.i8 (<4 x i32 > %arg0 , <4 x i32 > %arg1 , <4 x i32 > %arg2 , i32 0 , i32 0 , i32 0 )
@@ -1022,7 +1022,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
10221022; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
10231023; SDAG-NEXT: s_nop 1
10241024; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
1025- ; SDAG-NEXT: s_nop 6
1025+ ; SDAG-NEXT: s_nop 7
10261026; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
10271027; SDAG-NEXT: s_endpgm
10281028;
@@ -1043,7 +1043,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
10431043; GISEL-NEXT: s_nop 1
10441044; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
10451045; GISEL-NEXT: v_mov_b32_e32 v0, 0
1046- ; GISEL-NEXT: s_nop 5
1046+ ; GISEL-NEXT: s_nop 6
10471047; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
10481048; GISEL-NEXT: s_endpgm
10491049 %result = call <4 x i32 > @llvm.amdgcn.mfma.i32.16x16x64.i8 (<4 x i32 > %arg0 , <4 x i32 > %arg1 , <4 x i32 > %arg2 , i32 3 , i32 2 , i32 1 )
@@ -1097,7 +1097,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32>
10971097; SDAG-NEXT: v_mov_b32_e32 v1, s17
10981098; SDAG-NEXT: v_mov_b32_e32 v2, s18
10991099; SDAG-NEXT: v_mov_b32_e32 v3, s19
1100- ; SDAG-NEXT: s_nop 6
1100+ ; SDAG-NEXT: s_nop 7
11011101; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
11021102; SDAG-NEXT: s_waitcnt vmcnt(0)
11031103; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
@@ -1169,7 +1169,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32>
11691169; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
11701170; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
11711171; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
1172- ; GISEL-NEXT: s_nop 3
1172+ ; GISEL-NEXT: s_nop 4
11731173; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
11741174; GISEL-NEXT: s_waitcnt vmcnt(0)
11751175; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -1233,7 +1233,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4
12331233; SDAG-NEXT: v_mov_b32_e32 v1, s17
12341234; SDAG-NEXT: v_mov_b32_e32 v2, s18
12351235; SDAG-NEXT: v_mov_b32_e32 v3, s19
1236- ; SDAG-NEXT: s_nop 6
1236+ ; SDAG-NEXT: s_nop 7
12371237; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
12381238; SDAG-NEXT: s_waitcnt vmcnt(0)
12391239; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
@@ -1305,7 +1305,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4
13051305; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
13061306; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
13071307; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
1308- ; GISEL-NEXT: s_nop 3
1308+ ; GISEL-NEXT: s_nop 4
13091309; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
13101310; GISEL-NEXT: s_waitcnt vmcnt(0)
13111311; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -1352,7 +1352,7 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac(<4 x i32> %arg0, <4 x i32> %ar
13521352; GCN-NEXT: s_nop 1
13531353; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
13541354; GCN-NEXT: s_nop 7
1355- ; GCN-NEXT: s_nop 2
1355+ ; GCN-NEXT: s_nop 3
13561356; GCN-NEXT: v_accvgpr_read_b32 v0, a0
13571357; GCN-NEXT: v_accvgpr_read_b32 v1, a1
13581358; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1397,7 +1397,7 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac__flags(<4 x i32> %arg0, <4 x i
13971397; GCN-NEXT: s_nop 1
13981398; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
13991399; GCN-NEXT: s_nop 7
1400- ; GCN-NEXT: s_nop 2
1400+ ; GCN-NEXT: s_nop 3
14011401; GCN-NEXT: v_accvgpr_read_b32 v0, a0
14021402; GCN-NEXT: v_accvgpr_read_b32 v1, a1
14031403; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1717,7 +1717,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0
17171717; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
17181718; SDAG-NEXT: v_mov_b32_e32 v0, 0
17191719; SDAG-NEXT: s_nop 7
1720- ; SDAG-NEXT: s_nop 1
1720+ ; SDAG-NEXT: s_nop 2
17211721; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
17221722; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
17231723; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -1754,7 +1754,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0
17541754; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
17551755; GISEL-NEXT: v_mov_b32_e32 v0, 0
17561756; GISEL-NEXT: s_nop 7
1757- ; GISEL-NEXT: s_nop 1
1757+ ; GISEL-NEXT: s_nop 2
17581758; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
17591759; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
17601760; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -1801,7 +1801,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32>
18011801; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
18021802; SDAG-NEXT: v_mov_b32_e32 v0, 0
18031803; SDAG-NEXT: s_nop 7
1804- ; SDAG-NEXT: s_nop 1
1804+ ; SDAG-NEXT: s_nop 2
18051805; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
18061806; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
18071807; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -1838,7 +1838,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32>
18381838; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
18391839; GISEL-NEXT: v_mov_b32_e32 v0, 0
18401840; GISEL-NEXT: s_nop 7
1841- ; GISEL-NEXT: s_nop 1
1841+ ; GISEL-NEXT: s_nop 2
18421842; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
18431843; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
18441844; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -1865,7 +1865,7 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16(<8 x bfloat> %arg0, <8 x bfloat>
18651865; GCN-NEXT: v_accvgpr_write_b32 a3, v11
18661866; GCN-NEXT: s_nop 1
18671867; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3]
1868- ; GCN-NEXT: s_nop 6
1868+ ; GCN-NEXT: s_nop 7
18691869; GCN-NEXT: v_accvgpr_read_b32 v0, a0
18701870; GCN-NEXT: v_accvgpr_read_b32 v1, a1
18711871; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1885,7 +1885,7 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16__flags(<8 x bfloat> %arg0, <8 x
18851885; GCN-NEXT: v_accvgpr_write_b32 a3, v11
18861886; GCN-NEXT: s_nop 1
18871887; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1
1888- ; GCN-NEXT: s_nop 6
1888+ ; GCN-NEXT: s_nop 7
18891889; GCN-NEXT: v_accvgpr_read_b32 v0, a0
18901890; GCN-NEXT: v_accvgpr_read_b32 v1, a1
18911891; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1913,7 +1913,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs
19131913; GCN-NEXT: v_accvgpr_write_b32 a3, s3
19141914; GCN-NEXT: s_nop 1
19151915; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3]
1916- ; GCN-NEXT: s_nop 6
1916+ ; GCN-NEXT: s_nop 7
19171917; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
19181918; GCN-NEXT: s_endpgm
19191919 %result = call <4 x float > @llvm.amdgcn.mfma.f32.16x16x32.bf16 (<8 x bfloat> %arg0 , <8 x bfloat> %arg1 , <4 x float > %arg2 , i32 0 , i32 0 , i32 0 )
@@ -1939,7 +1939,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
19391939; GCN-NEXT: v_accvgpr_write_b32 a3, s3
19401940; GCN-NEXT: s_nop 1
19411941; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
1942- ; GCN-NEXT: s_nop 6
1942+ ; GCN-NEXT: s_nop 7
19431943; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
19441944; GCN-NEXT: s_endpgm
19451945 %result = call <4 x float > @llvm.amdgcn.mfma.f32.16x16x32.bf16 (<8 x bfloat> %arg0 , <8 x bfloat> %arg1 , <4 x float > %arg2 , i32 3 , i32 2 , i32 1 )
0 commit comments