@@ -257,20 +257,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
257257; GFX12:       ; %bb.0: ; %bb 
258258; GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0 
259259; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0 
260- ; GFX12-NEXT:    v_mov_b32_e32 v2, 15 
261- ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 
260+ ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) 
262261; GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0 
262+ ; GFX12-NEXT:    v_mov_b32_e32 v2, 15 
263263; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0 
264264; GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1 
265265; GFX12-NEXT:    s_wait_kmcnt 0x0 
266266; GFX12-NEXT:    s_lshl_b32 s0, s0, 7 
267- ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 
268- ; GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0 
269- ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) 
270- ; GFX12-NEXT:    v_add_nc_u32_e32 v1, s0, v1 
271- ; GFX12-NEXT:    scratch_store_b32 v0, v2, off scope:SCOPE_SYS 
267+ ; GFX12-NEXT:    scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS 
272268; GFX12-NEXT:    s_wait_storecnt 0x0 
273- ; GFX12-NEXT:    scratch_load_b32 v0, v1, off  offset:124 scope:SCOPE_SYS 
269+ ; GFX12-NEXT:    scratch_load_b32 v0, v1, s0  offset:124 scope:SCOPE_SYS 
274270; GFX12-NEXT:    s_wait_loadcnt 0x0 
275271; GFX12-NEXT:    s_endpgm 
276272; 
@@ -357,20 +353,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
357353; UNALIGNED_GFX12:       ; %bb.0: ; %bb 
358354; UNALIGNED_GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x0 
359355; UNALIGNED_GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0 
360- ; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v2, 15 
361- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 
356+ ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) 
362357; UNALIGNED_GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0 
358+ ; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v2, 15 
363359; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0 
364360; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1 
365361; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0 
366362; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s0, s0, 7 
367- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 
368- ; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0 
369- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) 
370- ; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v1, s0, v1 
371- ; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, off scope:SCOPE_SYS 
363+ ; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS 
372364; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0 
373- ; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, off  offset:124 scope:SCOPE_SYS 
365+ ; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, s0  offset:124 scope:SCOPE_SYS 
374366; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0 
375367; UNALIGNED_GFX12-NEXT:    s_endpgm 
376368bb:
@@ -937,19 +929,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
937929; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0 
938930; GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS 
939931; GFX12-NEXT:    s_wait_loadcnt 0x0 
940- ; GFX12-NEXT:    v_mov_b32_e32 v2, 15 
941932; GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0 
933+ ; GFX12-NEXT:    v_mov_b32_e32 v2, 15 
942934; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0 
943- ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
935+ ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
944936; GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1 
945937; GFX12-NEXT:    s_wait_kmcnt 0x0 
946938; GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS 
947939; GFX12-NEXT:    s_wait_storecnt 0x0 
948940; GFX12-NEXT:    s_lshl_b32 s0, s0, 7 
949941; GFX12-NEXT:    s_add_co_u32 s0, 0x100, s0 
950- ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) 
951- ; GFX12-NEXT:    v_add_nc_u32_e32 v1, s0, v1 
952- ; GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS 
942+ ; GFX12-NEXT:    scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS 
953943; GFX12-NEXT:    s_wait_loadcnt 0x0 
954944; GFX12-NEXT:    s_endpgm 
955945; 
@@ -1048,19 +1038,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
10481038; UNALIGNED_GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0 
10491039; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS 
10501040; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0 
1051- ; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v2, 15 
10521041; UNALIGNED_GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0 
1042+ ; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v2, 15 
10531043; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0 
1054- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
1044+ ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
10551045; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1 
10561046; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0 
10571047; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS 
10581048; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0 
10591049; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s0, s0, 7 
10601050; UNALIGNED_GFX12-NEXT:    s_add_co_u32 s0, 0x100, s0 
1061- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) 
1062- ; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v1, s0, v1 
1063- ; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS 
1051+ ; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS 
10641052; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0 
10651053; UNALIGNED_GFX12-NEXT:    s_endpgm 
10661054bb:
@@ -1579,19 +1567,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
15791567; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0 
15801568; GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS 
15811569; GFX12-NEXT:    s_wait_loadcnt 0x0 
1582- ; GFX12-NEXT:    v_mov_b32_e32 v2, 15 
15831570; GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0 
1571+ ; GFX12-NEXT:    v_mov_b32_e32 v2, 15 
15841572; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0 
1585- ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
1573+ ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
15861574; GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1 
15871575; GFX12-NEXT:    s_wait_kmcnt 0x0 
15881576; GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS 
15891577; GFX12-NEXT:    s_wait_storecnt 0x0 
15901578; GFX12-NEXT:    s_lshl_b32 s0, s0, 7 
15911579; GFX12-NEXT:    s_add_co_u32 s0, 0x4000, s0 
1592- ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) 
1593- ; GFX12-NEXT:    v_add_nc_u32_e32 v1, s0, v1 
1594- ; GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS 
1580+ ; GFX12-NEXT:    scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS 
15951581; GFX12-NEXT:    s_wait_loadcnt 0x0 
15961582; GFX12-NEXT:    s_endpgm 
15971583; 
@@ -1692,19 +1678,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
16921678; UNALIGNED_GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0 
16931679; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v3, off, off scope:SCOPE_SYS 
16941680; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0 
1695- ; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v2, 15 
16961681; UNALIGNED_GFX12-NEXT:    v_sub_nc_u32_e32 v1, 0, v0 
1682+ ; UNALIGNED_GFX12-NEXT:    v_mov_b32_e32 v2, 15 
16971683; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v0, 2, v0 
1698- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
1684+ ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) 
16991685; UNALIGNED_GFX12-NEXT:    v_lshlrev_b32_e32 v1, 2, v1 
17001686; UNALIGNED_GFX12-NEXT:    s_wait_kmcnt 0x0 
17011687; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS 
17021688; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0 
17031689; UNALIGNED_GFX12-NEXT:    s_lshl_b32 s0, s0, 7 
17041690; UNALIGNED_GFX12-NEXT:    s_add_co_u32 s0, 0x4000, s0 
1705- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) 
1706- ; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v1, s0, v1 
1707- ; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS 
1691+ ; UNALIGNED_GFX12-NEXT:    scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS 
17081692; UNALIGNED_GFX12-NEXT:    s_wait_loadcnt 0x0 
17091693; UNALIGNED_GFX12-NEXT:    s_endpgm 
17101694bb:
@@ -4060,9 +4044,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
40604044; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 
40614045; GFX12:       ; %bb.0: ; %bb 
40624046; GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 
4063- ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
4064- ; GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0 
4065- ; GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS 
4047+ ; GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS 
40664048; GFX12-NEXT:    s_wait_storecnt 0x0 
40674049; GFX12-NEXT:    s_endpgm 
40684050; 
@@ -4113,9 +4095,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
41134095; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: 
41144096; UNALIGNED_GFX12:       ; %bb.0: ; %bb 
41154097; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 
4116- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
4117- ; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0 
4118- ; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS 
4098+ ; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS 
41194099; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0 
41204100; UNALIGNED_GFX12-NEXT:    s_endpgm 
41214101bb:
@@ -4172,9 +4152,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
41724152; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 
41734153; GFX12:       ; %bb.0: ; %bb 
41744154; GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 
4175- ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
4176- ; GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0 
4177- ; GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS 
4155+ ; GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS 
41784156; GFX12-NEXT:    s_wait_storecnt 0x0 
41794157; GFX12-NEXT:    s_endpgm 
41804158; 
@@ -4223,9 +4201,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
42234201; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset: 
42244202; UNALIGNED_GFX12:       ; %bb.0: ; %bb 
42254203; UNALIGNED_GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 
4226- ; UNALIGNED_GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) 
4227- ; UNALIGNED_GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0 
4228- ; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS 
4204+ ; UNALIGNED_GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS 
42294205; UNALIGNED_GFX12-NEXT:    s_wait_storecnt 0x0 
42304206; UNALIGNED_GFX12-NEXT:    s_endpgm 
42314207bb:
0 commit comments