@@ -257,20 +257,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
257257; GFX12: ; %bb.0: ; %bb
258258; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0
259259; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
260- ; GFX12-NEXT: v_mov_b32_e32 v2, 15
261- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
260+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
262261; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
262+ ; GFX12-NEXT: v_mov_b32_e32 v2, 15
263263; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
264264; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
265265; GFX12-NEXT: s_wait_kmcnt 0x0
266266; GFX12-NEXT: s_lshl_b32 s0, s0, 7
267- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
268- ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
269- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
270- ; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
271- ; GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS
267+ ; GFX12-NEXT: scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS
272268; GFX12-NEXT: s_wait_storecnt 0x0
273- ; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
269+ ; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
274270; GFX12-NEXT: s_wait_loadcnt 0x0
275271; GFX12-NEXT: s_endpgm
276272;
@@ -357,20 +353,16 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) {
357353; UNALIGNED_GFX12: ; %bb.0: ; %bb
358354; UNALIGNED_GFX12-NEXT: s_load_b32 s0, s[4:5], 0x0
359355; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
360- ; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
361- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
356+ ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
362357; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
358+ ; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
363359; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
364360; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
365361; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
366362; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
367- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
368- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
369- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
370- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
371- ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off scope:SCOPE_SYS
363+ ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, s0 scope:SCOPE_SYS
372364; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
373- ; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
365+ ; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
374366; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
375367; UNALIGNED_GFX12-NEXT: s_endpgm
376368bb:
@@ -937,19 +929,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
937929; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
938930; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
939931; GFX12-NEXT: s_wait_loadcnt 0x0
940- ; GFX12-NEXT: v_mov_b32_e32 v2, 15
941932; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
933+ ; GFX12-NEXT: v_mov_b32_e32 v2, 15
942934; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
943- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
935+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
944936; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
945937; GFX12-NEXT: s_wait_kmcnt 0x0
946938; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS
947939; GFX12-NEXT: s_wait_storecnt 0x0
948940; GFX12-NEXT: s_lshl_b32 s0, s0, 7
949941; GFX12-NEXT: s_add_co_u32 s0, 0x100, s0
950- ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
951- ; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
952- ; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
942+ ; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
953943; GFX12-NEXT: s_wait_loadcnt 0x0
954944; GFX12-NEXT: s_endpgm
955945;
@@ -1048,19 +1038,17 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) {
10481038; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
10491039; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
10501040; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
1051- ; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
10521041; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
1042+ ; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
10531043; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1054- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1044+ ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
10551045; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
10561046; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
10571047; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:384 scope:SCOPE_SYS
10581048; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
10591049; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
10601050; UNALIGNED_GFX12-NEXT: s_add_co_u32 s0, 0x100, s0
1061- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1062- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
1063- ; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
1051+ ; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
10641052; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
10651053; UNALIGNED_GFX12-NEXT: s_endpgm
10661054bb:
@@ -1579,19 +1567,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
15791567; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
15801568; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
15811569; GFX12-NEXT: s_wait_loadcnt 0x0
1582- ; GFX12-NEXT: v_mov_b32_e32 v2, 15
15831570; GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
1571+ ; GFX12-NEXT: v_mov_b32_e32 v2, 15
15841572; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1585- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1573+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
15861574; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
15871575; GFX12-NEXT: s_wait_kmcnt 0x0
15881576; GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS
15891577; GFX12-NEXT: s_wait_storecnt 0x0
15901578; GFX12-NEXT: s_lshl_b32 s0, s0, 7
15911579; GFX12-NEXT: s_add_co_u32 s0, 0x4000, s0
1592- ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1593- ; GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
1594- ; GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
1580+ ; GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
15951581; GFX12-NEXT: s_wait_loadcnt 0x0
15961582; GFX12-NEXT: s_endpgm
15971583;
@@ -1692,19 +1678,17 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) {
16921678; UNALIGNED_GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
16931679; UNALIGNED_GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS
16941680; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
1695- ; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
16961681; UNALIGNED_GFX12-NEXT: v_sub_nc_u32_e32 v1, 0, v0
1682+ ; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 15
16971683; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1698- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
1684+ ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3 ) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
16991685; UNALIGNED_GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v1
17001686; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0
17011687; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v2, off offset:16512 scope:SCOPE_SYS
17021688; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
17031689; UNALIGNED_GFX12-NEXT: s_lshl_b32 s0, s0, 7
17041690; UNALIGNED_GFX12-NEXT: s_add_co_u32 s0, 0x4000, s0
1705- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1706- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v1, s0, v1
1707- ; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, off offset:124 scope:SCOPE_SYS
1691+ ; UNALIGNED_GFX12-NEXT: scratch_load_b32 v0, v1, s0 offset:124 scope:SCOPE_SYS
17081692; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0
17091693; UNALIGNED_GFX12-NEXT: s_endpgm
17101694bb:
@@ -4060,9 +4044,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
40604044; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
40614045; GFX12: ; %bb.0: ; %bb
40624046; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4063- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4064- ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4065- ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
4047+ ; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
40664048; GFX12-NEXT: s_wait_storecnt 0x0
40674049; GFX12-NEXT: s_endpgm
40684050;
@@ -4113,9 +4095,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
41134095; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
41144096; UNALIGNED_GFX12: ; %bb.0: ; %bb
41154097; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4116- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4117- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4118- ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
4098+ ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
41194099; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
41204100; UNALIGNED_GFX12-NEXT: s_endpgm
41214101bb:
@@ -4172,9 +4152,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
41724152; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
41734153; GFX12: ; %bb.0: ; %bb
41744154; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4175- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4176- ; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4177- ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4155+ ; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
41784156; GFX12-NEXT: s_wait_storecnt 0x0
41794157; GFX12-NEXT: s_endpgm
41804158;
@@ -4223,9 +4201,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(pt
42234201; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
42244202; UNALIGNED_GFX12: ; %bb.0: ; %bb
42254203; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4226- ; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4227- ; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4228- ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4204+ ; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
42294205; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
42304206; UNALIGNED_GFX12-NEXT: s_endpgm
42314207bb:
0 commit comments