@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
4949; GCN-NEXT: v_mov_b32_e32 v9, s13
5050; GCN-NEXT: v_mov_b32_e32 v10, s14
5151; GCN-NEXT: v_mov_b32_e32 v11, s15
52- ; GCN-NEXT: s_nop 3
52+ ; GCN-NEXT: s_nop 4
5353; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
5454; GCN-NEXT: s_waitcnt vmcnt(0)
5555; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
122122; GCN-NEXT: v_mov_b32_e32 v9, s13
123123; GCN-NEXT: v_mov_b32_e32 v10, s14
124124; GCN-NEXT: v_mov_b32_e32 v11, s15
125- ; GCN-NEXT: s_nop 3
125+ ; GCN-NEXT: s_nop 4
126126; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
127127; GCN-NEXT: s_waitcnt vmcnt(0)
128128; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
179179; GCN-NEXT: s_nop 1
180180; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181181; GCN-NEXT: s_nop 7
182- ; GCN-NEXT: s_nop 2
182+ ; GCN-NEXT: s_nop 3
183183; GCN-NEXT: v_accvgpr_read_b32 v0, a0
184184; GCN-NEXT: v_accvgpr_read_b32 v1, a1
185185; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
224224; GCN-NEXT: s_nop 1
225225; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226226; GCN-NEXT: s_nop 7
227- ; GCN-NEXT: s_nop 2
227+ ; GCN-NEXT: s_nop 3
228228; GCN-NEXT: v_accvgpr_read_b32 v0, a0
229229; GCN-NEXT: v_accvgpr_read_b32 v1, a1
230230; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -396,7 +396,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
396396; GCN-NEXT: v_mov_b32_e32 v16, 0
397397; GCN-NEXT: s_waitcnt lgkmcnt(0)
398398; GCN-NEXT: s_nop 7
399- ; GCN-NEXT: s_nop 0
399+ ; GCN-NEXT: s_nop 1
400400; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
401401; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
402402; GCN-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -431,7 +431,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
431431; GCN-NEXT: v_mov_b32_e32 v16, 0
432432; GCN-NEXT: s_waitcnt lgkmcnt(0)
433433; GCN-NEXT: s_nop 7
434- ; GCN-NEXT: s_nop 0
434+ ; GCN-NEXT: s_nop 1
435435; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
436436; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
437437; GCN-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
0 commit comments