@@ -58,8 +58,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #
58
58
; GCN-NEXT: s_nop 1
59
59
; GCN-NEXT: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3
60
60
; GCN-NEXT: v_mov_b32_e32 v0, 0
61
- ; GCN-NEXT: s_nop 7
62
- ; GCN-NEXT: s_nop 7
61
+ ; GCN-NEXT: s_nop 15
63
62
; GCN-NEXT: s_nop 1
64
63
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[34:35]
65
64
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[34:35] offset:16
@@ -109,8 +108,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #
109
108
; GCN-NEXT: s_nop 1
110
109
; GCN-NEXT: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
111
110
; GCN-NEXT: v_mov_b32_e32 v0, 0
112
- ; GCN-NEXT: s_nop 7
113
- ; GCN-NEXT: s_nop 1
111
+ ; GCN-NEXT: s_nop 9
114
112
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
115
113
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
116
114
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
@@ -185,8 +183,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #
185
183
; GCN-NEXT: s_nop 1
186
184
; GCN-NEXT: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
187
185
; GCN-NEXT: v_mov_b32_e32 v0, 0
188
- ; GCN-NEXT: s_nop 7
189
- ; GCN-NEXT: s_nop 7
186
+ ; GCN-NEXT: s_nop 15
190
187
; GCN-NEXT: s_nop 1
191
188
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
192
189
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
@@ -220,8 +217,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg)
220
217
; GCN-NEXT: s_nop 1
221
218
; GCN-NEXT: v_mfma_f32_16x16x16bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
222
219
; GCN-NEXT: v_mov_b32_e32 v0, 0
223
- ; GCN-NEXT: s_nop 7
224
- ; GCN-NEXT: s_nop 1
220
+ ; GCN-NEXT: s_nop 9
225
221
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
226
222
; GCN-NEXT: s_endpgm
227
223
bb:
@@ -277,8 +273,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
277
273
; GCN-NEXT: s_nop 1
278
274
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
279
275
; GCN-NEXT: v_mov_b32_e32 v0, 0
280
- ; GCN-NEXT: s_nop 7
281
- ; GCN-NEXT: s_nop 7
276
+ ; GCN-NEXT: s_nop 15
282
277
; GCN-NEXT: s_nop 0
283
278
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[8:9]
284
279
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[8:9] offset:16
@@ -302,8 +297,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %
302
297
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], 0
303
298
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
304
299
; GCN-NEXT: v_mov_b32_e32 v0, 0
305
- ; GCN-NEXT: s_nop 7
306
- ; GCN-NEXT: s_nop 7
300
+ ; GCN-NEXT: s_nop 15
307
301
; GCN-NEXT: s_nop 0
308
302
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
309
303
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -336,8 +330,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
336
330
; GCN-NEXT: s_nop 1
337
331
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
338
332
; GCN-NEXT: v_mov_b32_e32 v0, 0
339
- ; GCN-NEXT: s_nop 7
340
- ; GCN-NEXT: s_nop 7
333
+ ; GCN-NEXT: s_nop 15
341
334
; GCN-NEXT: s_nop 0
342
335
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
343
336
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -369,8 +362,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
369
362
; GCN-NEXT: s_nop 1
370
363
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
371
364
; GCN-NEXT: v_mov_b32_e32 v0, 0
372
- ; GCN-NEXT: s_nop 7
373
- ; GCN-NEXT: s_nop 7
365
+ ; GCN-NEXT: s_nop 15
374
366
; GCN-NEXT: s_nop 0
375
367
; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
376
368
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
0 commit comments