@@ -297,21 +297,194 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
297297 ret i32 %elt
298298}
299299
300- ;TODO: This test should be enabled in the upstream later. It currently causes a crash
301- ; during branch relaxation as the gfx1250 real opcode definition for V_LSHL_ADD_U64
302- ; is not yet upstreamed.
303- ;define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %ptr_b, ptr addrspace(1) %out) {
304- ; %a = load <16 x i64>, ptr addrspace(1) %ptr_a, align 4
305- ; %in_a = insertelement <16 x i64> %a, i64 100, i32 5
306- ; store <16 x i64> %in_a, ptr addrspace(1) null
307- ; %b = load <16 x i64>, ptr addrspace(1) %ptr_b, align 4
308- ; %in_b = insertelement <16 x i64> %a, i64 200, i32 10
309- ; store <16 x i64> %in_b, ptr addrspace(1) null
310- ; %add = add <16 x i64> %in_a, %in_b
311- ; store <16 x i64> %add, ptr addrspace(1) %out, align 4
312- ; %elt = extractelement <16 x i64> %add, i32 1
313- ; ret i64 %elt
314- ;}
300+ define i64 @test_v16i64_load_store (ptr addrspace (1 ) %ptr_a , ptr addrspace (1 ) %ptr_b , ptr addrspace (1 ) %out ) {
301+ ; GCN-SDAG-LABEL: test_v16i64_load_store:
302+ ; GCN-SDAG: ; %bb.0:
303+ ; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
304+ ; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
305+ ; GCN-SDAG-NEXT: s_clause 0x3
306+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:12
307+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:8
308+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4
309+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32
310+ ; GCN-SDAG-NEXT: s_clause 0x7
311+ ; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:112
312+ ; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:96
313+ ; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:80
314+ ; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:48
315+ ; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off offset:32
316+ ; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:16
317+ ; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off
318+ ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
319+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v16, 0x70
320+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v50, 0x60
321+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v51, 0 :: v_dual_mov_b32 v52, 48
322+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v38, 0x50 :: v_dual_mov_b32 v53, 0
323+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v54, 32
324+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v14, 0xc8 :: v_dual_mov_b32 v15, 0
325+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 64
326+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v55, 0 :: v_dual_mov_b32 v40, 16
327+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v49, 0
328+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0
329+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v43, 0
330+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x7
331+ ; GCN-SDAG-NEXT: global_store_b128 v[16:17], v[6:9], off
332+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x6
333+ ; GCN-SDAG-NEXT: global_store_b128 v[50:51], v[10:13], off
334+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x5
335+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x1
336+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v16, v20 :: v_dual_mov_b32 v17, v21
337+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x0
338+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13]
339+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11]
340+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9]
341+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, v[6:7]
342+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x4
343+ ; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off
344+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x3
345+ ; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[30:33], off
346+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x2
347+ ; GCN-SDAG-NEXT: global_store_b128 v[40:41], v[22:25], off
348+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x1
349+ ; GCN-SDAG-NEXT: global_store_b128 v[42:43], v[26:29], off
350+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
351+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x3
352+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[52:53], v[2:3], 0, v[2:3]
353+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[50:51], v[0:1], 0, v[0:1]
354+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x1
355+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25]
356+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23]
357+ ; GCN-SDAG-NEXT: s_wait_xcnt 0x0
358+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29]
359+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27]
360+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[36:37], v[36:37], 0, v[36:37]
361+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[34:35], v[34:35], 0, v[34:35]
362+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, 0x64
363+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31]
364+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, v[20:21]
365+ ; GCN-SDAG-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, 0xc8
366+ ; GCN-SDAG-NEXT: s_clause 0x1
367+ ; GCN-SDAG-NEXT: global_store_b128 v[38:39], v[14:17], off
368+ ; GCN-SDAG-NEXT: global_store_b128 v[48:49], v[0:3], off
369+ ; GCN-SDAG-NEXT: s_clause 0x7
370+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:96
371+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:112
372+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[50:53], off offset:64
373+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:80
374+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off offset:32
375+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[34:37], off offset:48
376+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off
377+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[22:25], off offset:16
378+ ; GCN-SDAG-NEXT: s_clause 0x3
379+ ; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32
380+ ; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:4
381+ ; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:8
382+ ; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:12
383+ ; GCN-SDAG-NEXT: s_wait_xcnt 0xc
384+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v0, v28 :: v_dual_mov_b32 v1, v29
385+ ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
386+ ; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
387+ ;
388+ ; GCN-GISEL-LABEL: test_v16i64_load_store:
389+ ; GCN-GISEL: ; %bb.0:
390+ ; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
391+ ; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
392+ ; GCN-GISEL-NEXT: s_clause 0x5
393+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:20
394+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:16
395+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:12
396+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:8
397+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:4
398+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32
399+ ; GCN-GISEL-NEXT: s_clause 0x7
400+ ; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:80
401+ ; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off
402+ ; GCN-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16
403+ ; GCN-GISEL-NEXT: global_load_b128 v[18:21], v[0:1], off offset:32
404+ ; GCN-GISEL-NEXT: global_load_b128 v[22:25], v[0:1], off offset:48
405+ ; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:96
406+ ; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:112
407+ ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:64
408+ ; GCN-GISEL-NEXT: v_mov_b32_e32 v34, 0xc8
409+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v35, 0 :: v_dual_mov_b32 v38, 0
410+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v39, 0 :: v_dual_mov_b32 v48, 16
411+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v49, 0 :: v_dual_mov_b32 v50, 32
412+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v52, 48 :: v_dual_mov_b32 v51, 0
413+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v53, 0 :: v_dual_mov_b32 v54, 64
414+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v40, 0x50 :: v_dual_mov_b32 v55, 0
415+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 0x60
416+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v44, 0x70 :: v_dual_mov_b32 v43, 0
417+ ; GCN-GISEL-NEXT: v_mov_b32_e32 v45, 0
418+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x7
419+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v37, v9 :: v_dual_mov_b32 v36, v8
420+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[6:7], v[6:7], 0, 0xc8
421+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[8:9], v[8:9], 0, v[8:9]
422+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x6
423+ ; GCN-GISEL-NEXT: global_store_b128 v[38:39], v[10:13], off
424+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x5
425+ ; GCN-GISEL-NEXT: global_store_b128 v[48:49], v[14:17], off
426+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x4
427+ ; GCN-GISEL-NEXT: global_store_b128 v[50:51], v[18:21], off
428+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x3
429+ ; GCN-GISEL-NEXT: global_store_b128 v[52:53], v[22:25], off
430+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x2
431+ ; GCN-GISEL-NEXT: global_store_b128 v[42:43], v[26:29], off
432+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x1
433+ ; GCN-GISEL-NEXT: global_store_b128 v[44:45], v[30:33], off
434+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x5
435+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[10:11], v[10:11], 0, v[10:11]
436+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[12:13], v[12:13], 0, v[12:13]
437+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x4
438+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[14:15], v[14:15], 0, v[14:15]
439+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[16:17], v[16:17], 0, v[16:17]
440+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x3
441+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[18:19], v[18:19], 0, v[18:19]
442+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[20:21], v[20:21], 0, 0x64
443+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x2
444+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[22:23], v[22:23], 0, v[22:23]
445+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[24:25], v[24:25], 0, v[24:25]
446+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
447+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[48:49], v[0:1], 0, v[0:1]
448+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[50:51], v[2:3], 0, v[2:3]
449+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x1
450+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[26:27], v[26:27], 0, v[26:27]
451+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[28:29], v[28:29], 0, v[28:29]
452+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x0
453+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[30:31], v[30:31], 0, v[30:31]
454+ ; GCN-GISEL-NEXT: v_lshl_add_u64 v[32:33], v[32:33], 0, v[32:33]
455+ ; GCN-GISEL-NEXT: s_clause 0x1
456+ ; GCN-GISEL-NEXT: global_store_b128 v[54:55], v[0:3], off
457+ ; GCN-GISEL-NEXT: global_store_b128 v[40:41], v[34:37], off
458+ ; GCN-GISEL-NEXT: s_clause 0x7
459+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off
460+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[14:17], off offset:16
461+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[18:21], off offset:32
462+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[22:25], off offset:48
463+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[48:51], off offset:64
464+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off offset:80
465+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[26:29], off offset:96
466+ ; GCN-GISEL-NEXT: global_store_b128 v[4:5], v[30:33], off offset:112
467+ ; GCN-GISEL-NEXT: s_clause 0x5
468+ ; GCN-GISEL-NEXT: scratch_load_b32 v45, off, s32
469+ ; GCN-GISEL-NEXT: scratch_load_b32 v44, off, s32 offset:4
470+ ; GCN-GISEL-NEXT: scratch_load_b32 v43, off, s32 offset:8
471+ ; GCN-GISEL-NEXT: scratch_load_b32 v42, off, s32 offset:12
472+ ; GCN-GISEL-NEXT: scratch_load_b32 v41, off, s32 offset:16
473+ ; GCN-GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:20
474+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13
475+ ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
476+ ; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
477+ %a = load <16 x i64 >, ptr addrspace (1 ) %ptr_a , align 4
478+ %in_a = insertelement <16 x i64 > %a , i64 100 , i32 5
479+ store <16 x i64 > %in_a , ptr addrspace (1 ) null
480+ %b = load <16 x i64 >, ptr addrspace (1 ) %ptr_b , align 4
481+ %in_b = insertelement <16 x i64 > %a , i64 200 , i32 10
482+ store <16 x i64 > %in_b , ptr addrspace (1 ) null
483+ %add = add <16 x i64 > %in_a , %in_b
484+ store <16 x i64 > %add , ptr addrspace (1 ) %out , align 4
485+ %elt = extractelement <16 x i64 > %add , i32 1
486+ ret i64 %elt
487+ }
315488
316489define amdgpu_kernel void @test_v7i16_load_store_kernel (ptr addrspace (1 ) %ptr1 , ptr addrspace (1 ) %ptr2 , ptr addrspace (1 ) %out ) {
317490; GCN-SDAG-LABEL: test_v7i16_load_store_kernel:
0 commit comments