@@ -15,9 +15,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
1515; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1616; GFX9-NEXT: s_lshl_b32 s1, s0, 2
1717; GFX9-NEXT: s_and_b32 s0, s0, 15
18+ ; GFX9-NEXT: s_add_i32 s1, s1, 0
1819; GFX9-NEXT: s_lshl_b32 s0, s0, 2
1920; GFX9-NEXT: scratch_store_dword off, v0, s1
2021; GFX9-NEXT: s_waitcnt vmcnt(0)
22+ ; GFX9-NEXT: s_add_i32 s0, s0, 0
2123; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
2224; GFX9-NEXT: s_waitcnt vmcnt(0)
2325; GFX9-NEXT: s_endpgm
@@ -34,6 +36,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
3436; GFX10-NEXT: s_and_b32 s1, s0, 15
3537; GFX10-NEXT: s_lshl_b32 s0, s0, 2
3638; GFX10-NEXT: s_lshl_b32 s1, s1, 2
39+ ; GFX10-NEXT: s_add_i32 s0, s0, 0
40+ ; GFX10-NEXT: s_add_i32 s1, s1, 0
3741; GFX10-NEXT: scratch_store_dword off, v0, s0
3842; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3943; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -47,9 +51,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
4751; GFX940-NEXT: s_waitcnt lgkmcnt(0)
4852; GFX940-NEXT: s_lshl_b32 s1, s0, 2
4953; GFX940-NEXT: s_and_b32 s0, s0, 15
54+ ; GFX940-NEXT: s_add_i32 s1, s1, 0
5055; GFX940-NEXT: s_lshl_b32 s0, s0, 2
5156; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
5257; GFX940-NEXT: s_waitcnt vmcnt(0)
58+ ; GFX940-NEXT: s_add_i32 s0, s0, 0
5359; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
5460; GFX940-NEXT: s_waitcnt vmcnt(0)
5561; GFX940-NEXT: s_endpgm
@@ -62,6 +68,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
6268; GFX11-NEXT: s_and_b32 s1, s0, 15
6369; GFX11-NEXT: s_lshl_b32 s0, s0, 2
6470; GFX11-NEXT: s_lshl_b32 s1, s1, 2
71+ ; GFX11-NEXT: s_add_i32 s0, s0, 0
72+ ; GFX11-NEXT: s_add_i32 s1, s1, 0
6573; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
6674; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
6775; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -76,6 +84,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
7684; GFX12-NEXT: s_and_b32 s1, s0, 15
7785; GFX12-NEXT: s_lshl_b32 s0, s0, 2
7886; GFX12-NEXT: s_lshl_b32 s1, s1, 2
87+ ; GFX12-NEXT: s_add_co_i32 s0, s0, 0
88+ ; GFX12-NEXT: s_add_co_i32 s1, s1, 0
7989; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
8090; GFX12-NEXT: s_wait_storecnt 0x0
8191; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -1032,13 +1042,13 @@ define void @store_load_large_imm_offset_foo() {
10321042; GFX9-LABEL: store_load_large_imm_offset_foo:
10331043; GFX9: ; %bb.0: ; %bb
10341044; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
10361045; GFX9-NEXT: v_mov_b32_e32 v0, 13
1037- ; GFX9-NEXT: s_add_i32 s1, s32, s0
1046+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1047+ ; GFX9-NEXT: s_add_i32 s1, s32, 4
10381048; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
10391049; GFX9-NEXT: s_waitcnt vmcnt(0)
10401050; GFX9-NEXT: v_mov_b32_e32 v0, 15
1041- ; GFX9-NEXT: s_add_i32 s0, s1, 4
1051+ ; GFX9-NEXT: s_add_i32 s0, s0, s1
10421052; GFX9-NEXT: scratch_store_dword off, v0, s0
10431053; GFX9-NEXT: s_waitcnt vmcnt(0)
10441054; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1049,10 +1059,10 @@ define void @store_load_large_imm_offset_foo() {
10491059; GFX10: ; %bb.0: ; %bb
10501060; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10511061; GFX10-NEXT: v_mov_b32_e32 v0, 13
1052- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
10531062; GFX10-NEXT: v_mov_b32_e32 v1, 15
1054- ; GFX10-NEXT: s_add_i32 s1, s32, s0
1055- ; GFX10-NEXT: s_add_i32 s0, s1, 4
1063+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1064+ ; GFX10-NEXT: s_add_i32 s1, s32, 4
1065+ ; GFX10-NEXT: s_add_i32 s0, s0, s1
10561066; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
10571067; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
10581068; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1064,13 +1074,13 @@ define void @store_load_large_imm_offset_foo() {
10641074; GFX940-LABEL: store_load_large_imm_offset_foo:
10651075; GFX940: ; %bb.0: ; %bb
10661076; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1067- ; GFX940-NEXT: s_movk_i32 s0, 0x3e80
10681077; GFX940-NEXT: v_mov_b32_e32 v0, 13
1069- ; GFX940-NEXT: s_add_i32 s1, s32, s0
1078+ ; GFX940-NEXT: s_movk_i32 s0, 0x3e80
1079+ ; GFX940-NEXT: s_add_i32 s1, s32, 4
10701080; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
10711081; GFX940-NEXT: s_waitcnt vmcnt(0)
10721082; GFX940-NEXT: v_mov_b32_e32 v0, 15
1073- ; GFX940-NEXT: s_add_i32 s0, s1, 4
1083+ ; GFX940-NEXT: s_add_i32 s0, s0, s1
10741084; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
10751085; GFX940-NEXT: s_waitcnt vmcnt(0)
10761086; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1082,9 +1092,9 @@ define void @store_load_large_imm_offset_foo() {
10821092; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10831093; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
10841094; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1085- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1086- ; GFX11-NEXT: s_add_i32 s1, s32, s0
1087- ; GFX11-NEXT: s_add_i32 s0, s1, 4
1095+ ; GFX11-NEXT: s_add_i32 s1, s32, 4
1096+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1097+ ; GFX11-NEXT: s_add_i32 s0, s0, s1
10881098; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
10891099; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
10901100; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments