@@ -58,8 +58,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
5858; GCN-NEXT: s_nop 1
5959; GCN-NEXT: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3
6060; GCN-NEXT: v_mov_b32_e32 v0, 0
61- ; GCN-NEXT: s_nop 7
62- ; GCN-NEXT: s_nop 7
61+ ; GCN-NEXT: s_nop 15
6362; GCN-NEXT: s_nop 1
6463; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[34:35]
6564; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[34:35] offset:16
@@ -109,8 +108,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #
109108; GCN-NEXT: s_nop 1
110109; GCN-NEXT: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
111110; GCN-NEXT: v_mov_b32_e32 v0, 0
112- ; GCN-NEXT: s_nop 7
113- ; GCN-NEXT: s_nop 1
111+ ; GCN-NEXT: s_nop 9
114112; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
115113; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
116114; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
@@ -185,8 +183,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #
185183; GCN-NEXT: s_nop 1
186184; GCN-NEXT: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
187185; GCN-NEXT: v_mov_b32_e32 v0, 0
188- ; GCN-NEXT: s_nop 7
189- ; GCN-NEXT: s_nop 7
186+ ; GCN-NEXT: s_nop 15
190187; GCN-NEXT: s_nop 1
191188; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
192189; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
@@ -220,8 +217,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg)
220217; GCN-NEXT: s_nop 1
221218; GCN-NEXT: v_mfma_f32_16x16x16bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
222219; GCN-NEXT: v_mov_b32_e32 v0, 0
223- ; GCN-NEXT: s_nop 7
224- ; GCN-NEXT: s_nop 1
220+ ; GCN-NEXT: s_nop 9
225221; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
226222; GCN-NEXT: s_endpgm
227223bb:
@@ -277,8 +273,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
277273; GCN-NEXT: s_nop 1
278274; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
279275; GCN-NEXT: v_mov_b32_e32 v0, 0
280- ; GCN-NEXT: s_nop 7
281- ; GCN-NEXT: s_nop 7
276+ ; GCN-NEXT: s_nop 15
282277; GCN-NEXT: s_nop 0
283278; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[8:9]
284279; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[8:9] offset:16
@@ -302,8 +297,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %
302297; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], 0
303298; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
304299; GCN-NEXT: v_mov_b32_e32 v0, 0
305- ; GCN-NEXT: s_nop 7
306- ; GCN-NEXT: s_nop 7
300+ ; GCN-NEXT: s_nop 15
307301; GCN-NEXT: s_nop 0
308302; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
309303; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -336,8 +330,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
336330; GCN-NEXT: s_nop 1
337331; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
338332; GCN-NEXT: v_mov_b32_e32 v0, 0
339- ; GCN-NEXT: s_nop 7
340- ; GCN-NEXT: s_nop 7
333+ ; GCN-NEXT: s_nop 15
341334; GCN-NEXT: s_nop 0
342335; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
343336; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -369,8 +362,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
369362; GCN-NEXT: s_nop 1
370363; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
371364; GCN-NEXT: v_mov_b32_e32 v0, 0
372- ; GCN-NEXT: s_nop 7
373- ; GCN-NEXT: s_nop 7
365+ ; GCN-NEXT: s_nop 15
374366; GCN-NEXT: s_nop 0
375367; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
376368; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
0 commit comments