@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
4949; GCN-NEXT: v_mov_b32_e32 v9, s17
5050; GCN-NEXT: v_mov_b32_e32 v10, s18
5151; GCN-NEXT: v_mov_b32_e32 v11, s19
52- ; GCN-NEXT: s_nop 3
52+ ; GCN-NEXT: s_nop 4
5353; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
5454; GCN-NEXT: s_waitcnt vmcnt(0)
5555; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
122122; GCN-NEXT: v_mov_b32_e32 v9, s17
123123; GCN-NEXT: v_mov_b32_e32 v10, s18
124124; GCN-NEXT: v_mov_b32_e32 v11, s19
125- ; GCN-NEXT: s_nop 3
125+ ; GCN-NEXT: s_nop 4
126126; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
127127; GCN-NEXT: s_waitcnt vmcnt(0)
128128; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
179179; GCN-NEXT: s_nop 1
180180; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181181; GCN-NEXT: s_nop 7
182- ; GCN-NEXT: s_nop 2
182+ ; GCN-NEXT: s_nop 3
183183; GCN-NEXT: v_accvgpr_read_b32 v0, a0
184184; GCN-NEXT: v_accvgpr_read_b32 v1, a1
185185; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
224224; GCN-NEXT: s_nop 1
225225; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226226; GCN-NEXT: s_nop 7
227- ; GCN-NEXT: s_nop 2
227+ ; GCN-NEXT: s_nop 3
228228; GCN-NEXT: v_accvgpr_read_b32 v0, a0
229229; GCN-NEXT: v_accvgpr_read_b32 v1, a1
230230; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -417,7 +417,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
417417; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
418418; GCN-NEXT: v_mov_b32_e32 v0, 0
419419; GCN-NEXT: s_nop 7
420- ; GCN-NEXT: s_nop 1
420+ ; GCN-NEXT: s_nop 2
421421; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
422422; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
423423; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -459,7 +459,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
459459; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
460460; GCN-NEXT: v_mov_b32_e32 v0, 0
461461; GCN-NEXT: s_nop 7
462- ; GCN-NEXT: s_nop 1
462+ ; GCN-NEXT: s_nop 2
463463; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
464464; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
465465; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
0 commit comments