@@ -444,77 +444,65 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) inreg %src, ptr addrspa
444444; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
445445; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
446446; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
447- ; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s16
447+ ; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000
448+ ; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
448449; GISEL-GFX942-NEXT: .LBB0_1: ; %load-store-loop
449450; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
450- ; GISEL-GFX942-NEXT: v_add_u32_e32 v1, s0, v0
451- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v1, s[8:11], 0 offen
452- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v1, s[8:11], 0 offen offset:16
453- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1, s[8:11], 0 offen offset:32
454- ; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s12, v0
455- ; GISEL-GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0
451+ ; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v1
452+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen
453+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16
454+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32
455+ ; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1
456+ ; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1
457+ ; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
456458; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
457459; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a0, v13 ; Reload Reuse
458460; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a1, v12 ; Reload Reuse
459461; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a2, v11 ; Reload Reuse
460462; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a3, v10 ; Reload Reuse
461- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v1 , s[8:11], 0 offen offset:48
462- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v1 , s[8:11], 0 offen offset:64
463- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v1 , s[8:11], 0 offen offset:80
464- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v1 , s[8:11], 0 offen offset:96
465- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v1 , s[8:11], 0 offen offset:112
466- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v1 , s[8:11], 0 offen offset:128
467- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v1 , s[8:11], 0 offen offset:144
468- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v1 , s[8:11], 0 offen offset:160
469- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v1 , s[8:11], 0 offen offset:176
470- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v1 , s[8:11], 0 offen offset:192
471- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v1 , s[8:11], 0 offen offset:208
472- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v1 , s[8:11], 0 offen offset:224
473- ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1 , s[8:11], 0 offen offset:240
463+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v62 , s[8:11], 0 offen offset:48
464+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v62 , s[8:11], 0 offen offset:64
465+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v62 , s[8:11], 0 offen offset:80
466+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v62 , s[8:11], 0 offen offset:96
467+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v62 , s[8:11], 0 offen offset:112
468+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v62 , s[8:11], 0 offen offset:128
469+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v62 , s[8:11], 0 offen offset:144
470+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v62 , s[8:11], 0 offen offset:160
471+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v62 , s[8:11], 0 offen offset:176
472+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v62 , s[8:11], 0 offen offset:192
473+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v62 , s[8:11], 0 offen offset:208
474+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v62 , s[8:11], 0 offen offset:224
475+ ; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62 , s[8:11], 0 offen offset:240
474476; GISEL-GFX942-NEXT: s_nop 0
475- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen
476- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v62, s[4:7], 0 offen offset:16
477- ; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, 0x2000
477+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
478+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16
479+ ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(2)
480+ ; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a4, v13 ; Reload Reuse
478481; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
479482; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
480483; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
481484; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
482- ; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
483- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen offset:32
484- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
485- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v62, s[4:7], 0 offen offset:48
486- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
487- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v62, s[4:7], 0 offen offset:64
488- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
489- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v62, s[4:7], 0 offen offset:80
490- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
491- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v62, s[4:7], 0 offen offset:96
492- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
493- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v62, s[4:7], 0 offen offset:112
494- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
495- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v62, s[4:7], 0 offen offset:128
496- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
497- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v62, s[4:7], 0 offen offset:144
498- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
499- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v62, s[4:7], 0 offen offset:160
500- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
501- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v62, s[4:7], 0 offen offset:176
502- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
503- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v62, s[4:7], 0 offen offset:192
504- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
505- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v62, s[4:7], 0 offen offset:208
506- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
507- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v62, s[4:7], 0 offen offset:224
508- ; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
509- ; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a4, v13 ; Reload Reuse
510485; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a5, v12 ; Reload Reuse
511486; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a6, v11 ; Reload Reuse
512487; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a7, v10 ; Reload Reuse
488+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen offset:32
489+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v63, s[4:7], 0 offen offset:48
490+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v63, s[4:7], 0 offen offset:64
491+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v63, s[4:7], 0 offen offset:80
492+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v63, s[4:7], 0 offen offset:96
493+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v63, s[4:7], 0 offen offset:112
494+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v63, s[4:7], 0 offen offset:128
495+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v63, s[4:7], 0 offen offset:144
496+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v63, s[4:7], 0 offen offset:160
497+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v63, s[4:7], 0 offen offset:176
498+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v63, s[4:7], 0 offen offset:192
499+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v63, s[4:7], 0 offen offset:208
500+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v63, s[4:7], 0 offen offset:224
513501; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v5, a4 ; Reload Reuse
514502; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v4, a5 ; Reload Reuse
515503; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v3, a6 ; Reload Reuse
516504; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v2, a7 ; Reload Reuse
517- ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62 , s[4:7], 0 offen offset:240
505+ ; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63 , s[4:7], 0 offen offset:240
518506; GISEL-GFX942-NEXT: s_cbranch_vccnz .LBB0_1
519507; GISEL-GFX942-NEXT: ; %bb.2: ; %memcpy-split
520508; GISEL-GFX942-NEXT: s_endpgm
0 commit comments