@@ -21,11 +21,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
2121; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2222; GFX9-NEXT: s_lshl_b32 s1, s0, 2
2323; GFX9-NEXT: s_and_b32 s0, s0, 15
24- ; GFX9-NEXT: s_add_i32 s1, s1, 0
2524; GFX9-NEXT: s_lshl_b32 s0, s0, 2
2625; GFX9-NEXT: scratch_store_dword off, v0, s1
2726; GFX9-NEXT: s_waitcnt vmcnt(0)
28- ; GFX9-NEXT: s_add_i32 s0, s0, 0
2927; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
3028; GFX9-NEXT: s_waitcnt vmcnt(0)
3129; GFX9-NEXT: s_endpgm
@@ -42,8 +40,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
4240; GFX10-NEXT: s_and_b32 s1, s0, 15
4341; GFX10-NEXT: s_lshl_b32 s0, s0, 2
4442; GFX10-NEXT: s_lshl_b32 s1, s1, 2
45- ; GFX10-NEXT: s_add_i32 s0, s0, 0
46- ; GFX10-NEXT: s_add_i32 s1, s1, 0
4743; GFX10-NEXT: scratch_store_dword off, v0, s0
4844; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4945; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -57,7 +53,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
5753; GFX940-NEXT: s_waitcnt lgkmcnt(0)
5854; GFX940-NEXT: s_lshl_b32 s1, s0, 2
5955; GFX940-NEXT: s_and_b32 s0, s0, 15
60- ; GFX940-NEXT: s_add_i32 s1, s1, 0
6156; GFX940-NEXT: s_lshl_b32 s0, s0, 2
6257; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
6358; GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -75,7 +70,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
7570; GFX11-NEXT: s_lshl_b32 s1, s1, 2
7671; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7772; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
78- ; GFX11-NEXT: s_add_i32 s0, s0, 0
7973; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
8074; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
8175; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -108,11 +102,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
108102; UNALIGNED_GFX9-NEXT: s_waitcnt lgkmcnt(0)
109103; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2
110104; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15
111- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s1, 0
112105; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2
113106; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1
114107; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
115- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 0
116108; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
117109; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
118110; UNALIGNED_GFX9-NEXT: s_endpgm
@@ -129,8 +121,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
129121; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15
130122; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2
131123; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2
132- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 0
133- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s1, 0
134124; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0
135125; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
136126; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -144,7 +134,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
144134; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0)
145135; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2
146136; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15
147- ; UNALIGNED_GFX940-NEXT: s_add_i32 s1, s1, 0
148137; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2
149138; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
150139; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -162,7 +151,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
162151; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2
163152; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
164153; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
165- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 0
166154; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
167155; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
168156; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -1923,13 +1911,13 @@ define void @store_load_large_imm_offset_foo() {
19231911; GFX9-LABEL: store_load_large_imm_offset_foo:
19241912; GFX9: ; %bb.0: ; %bb
19251913; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926- ; GFX9-NEXT: v_mov_b32_e32 v0, 13
19271914; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1928- ; GFX9-NEXT: s_add_i32 s1, s32, 4
1915+ ; GFX9-NEXT: v_mov_b32_e32 v0, 13
1916+ ; GFX9-NEXT: s_add_i32 s1, s32, s0
19291917; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19301918; GFX9-NEXT: s_waitcnt vmcnt(0)
19311919; GFX9-NEXT: v_mov_b32_e32 v0, 15
1932- ; GFX9-NEXT: s_add_i32 s0, s0, s1
1920+ ; GFX9-NEXT: s_add_i32 s0, s1, 4
19331921; GFX9-NEXT: scratch_store_dword off, v0, s0
19341922; GFX9-NEXT: s_waitcnt vmcnt(0)
19351923; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1940,10 +1928,10 @@ define void @store_load_large_imm_offset_foo() {
19401928; GFX10: ; %bb.0: ; %bb
19411929; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19421930; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
19441931; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1945- ; GFX10-NEXT: s_add_i32 s1, s32, 4
1946- ; GFX10-NEXT: s_add_i32 s0, s0, s1
1932+ ; GFX10-NEXT: v_mov_b32_e32 v1, 15
1933+ ; GFX10-NEXT: s_add_i32 s1, s32, s0
1934+ ; GFX10-NEXT: s_add_i32 s0, s1, 4
19471935; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
19481936; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19491937; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1999,13 +1987,13 @@ define void @store_load_large_imm_offset_foo() {
19991987; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
20001988; UNALIGNED_GFX9: ; %bb.0: ; %bb
20011989; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2002- ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
20031990; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
2004- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, 4
1991+ ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
1992+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
20051993; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
20061994; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
20071995; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2008- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, s1
1996+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
20091997; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
20101998; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
20111999; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2016,10 +2004,10 @@ define void @store_load_large_imm_offset_foo() {
20162004; UNALIGNED_GFX10: ; %bb.0: ; %bb
20172005; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20182006; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2019- ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
20202007; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2021- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, 4
2022- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, s1
2008+ ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2009+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2010+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
20232011; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20242012; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20252013; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
0 commit comments