@@ -15,11 +15,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
1515; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1616; GFX9-NEXT: s_lshl_b32 s1, s0, 2
1717; GFX9-NEXT: s_and_b32 s0, s0, 15
18- ; GFX9-NEXT: s_add_i32 s1, s1, 0
1918; GFX9-NEXT: s_lshl_b32 s0, s0, 2
2019; GFX9-NEXT: scratch_store_dword off, v0, s1
2120; GFX9-NEXT: s_waitcnt vmcnt(0)
22- ; GFX9-NEXT: s_add_i32 s0, s0, 0
2321; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
2422; GFX9-NEXT: s_waitcnt vmcnt(0)
2523; GFX9-NEXT: s_endpgm
@@ -36,8 +34,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
3634; GFX10-NEXT: s_and_b32 s1, s0, 15
3735; GFX10-NEXT: s_lshl_b32 s0, s0, 2
3836; GFX10-NEXT: s_lshl_b32 s1, s1, 2
39- ; GFX10-NEXT: s_add_i32 s0, s0, 0
40- ; GFX10-NEXT: s_add_i32 s1, s1, 0
4137; GFX10-NEXT: scratch_store_dword off, v0, s0
4238; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4339; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -51,11 +47,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
5147; GFX940-NEXT: s_waitcnt lgkmcnt(0)
5248; GFX940-NEXT: s_lshl_b32 s1, s0, 2
5349; GFX940-NEXT: s_and_b32 s0, s0, 15
54- ; GFX940-NEXT: s_add_i32 s1, s1, 0
5550; GFX940-NEXT: s_lshl_b32 s0, s0, 2
5651; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
5752; GFX940-NEXT: s_waitcnt vmcnt(0)
58- ; GFX940-NEXT: s_add_i32 s0, s0, 0
5953; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
6054; GFX940-NEXT: s_waitcnt vmcnt(0)
6155; GFX940-NEXT: s_endpgm
@@ -68,8 +62,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
6862; GFX11-NEXT: s_and_b32 s1, s0, 15
6963; GFX11-NEXT: s_lshl_b32 s0, s0, 2
7064; GFX11-NEXT: s_lshl_b32 s1, s1, 2
71- ; GFX11-NEXT: s_add_i32 s0, s0, 0
72- ; GFX11-NEXT: s_add_i32 s1, s1, 0
7365; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
7466; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
7567; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -84,8 +76,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
8476; GFX12-NEXT: s_and_b32 s1, s0, 15
8577; GFX12-NEXT: s_lshl_b32 s0, s0, 2
8678; GFX12-NEXT: s_lshl_b32 s1, s1, 2
87- ; GFX12-NEXT: s_add_co_i32 s0, s0, 0
88- ; GFX12-NEXT: s_add_co_i32 s1, s1, 0
8979; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
9080; GFX12-NEXT: s_wait_storecnt 0x0
9181; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -1042,13 +1032,13 @@ define void @store_load_large_imm_offset_foo() {
10421032; GFX9-LABEL: store_load_large_imm_offset_foo:
10431033; GFX9: ; %bb.0: ; %bb
10441034; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045- ; GFX9-NEXT: v_mov_b32_e32 v0, 13
10461035; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1047- ; GFX9-NEXT: s_add_i32 s1, s32, 4
1036+ ; GFX9-NEXT: v_mov_b32_e32 v0, 13
1037+ ; GFX9-NEXT: s_add_i32 s0, s32, s0
10481038; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
10491039; GFX9-NEXT: s_waitcnt vmcnt(0)
10501040; GFX9-NEXT: v_mov_b32_e32 v0, 15
1051- ; GFX9-NEXT: s_add_i32 s0, s0, s1
1041+ ; GFX9-NEXT: s_add_i32 s0, s0, 4
10521042; GFX9-NEXT: scratch_store_dword off, v0, s0
10531043; GFX9-NEXT: s_waitcnt vmcnt(0)
10541044; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1059,10 +1049,10 @@ define void @store_load_large_imm_offset_foo() {
10591049; GFX10: ; %bb.0: ; %bb
10601050; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10611051; GFX10-NEXT: v_mov_b32_e32 v0, 13
1062- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
10631052; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1064- ; GFX10-NEXT: s_add_i32 s1, s32, 4
1065- ; GFX10-NEXT: s_add_i32 s0, s0, s1
1053+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
1054+ ; GFX10-NEXT: s_add_i32 s0, s32, s0
1055+ ; GFX10-NEXT: s_add_i32 s0, s0, 4
10661056; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
10671057; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
10681058; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1074,13 +1064,13 @@ define void @store_load_large_imm_offset_foo() {
10741064; GFX940-LABEL: store_load_large_imm_offset_foo:
10751065; GFX940: ; %bb.0: ; %bb
10761066; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077- ; GFX940-NEXT: v_mov_b32_e32 v0, 13
10781067; GFX940-NEXT: s_movk_i32 s0, 0x3e80
1079- ; GFX940-NEXT: s_add_i32 s1, s32, 4
1068+ ; GFX940-NEXT: v_mov_b32_e32 v0, 13
1069+ ; GFX940-NEXT: s_add_i32 s0, s32, s0
10801070; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
10811071; GFX940-NEXT: s_waitcnt vmcnt(0)
10821072; GFX940-NEXT: v_mov_b32_e32 v0, 15
1083- ; GFX940-NEXT: s_add_i32 s0, s0, s1
1073+ ; GFX940-NEXT: s_add_i32 s0, s0, 4
10841074; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
10851075; GFX940-NEXT: s_waitcnt vmcnt(0)
10861076; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1092,9 +1082,9 @@ define void @store_load_large_imm_offset_foo() {
10921082; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10931083; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
10941084; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1095- ; GFX11-NEXT: s_add_i32 s1, s32, 4
1096- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1097- ; GFX11-NEXT: s_add_i32 s0, s0, s1
1085+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1086+ ; GFX11-NEXT: s_add_i32 s0, s32, s0
1087+ ; GFX11-NEXT: s_add_i32 s0, s0, 4
10981088; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
10991089; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
11001090; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments