@@ -21,9 +21,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
2121; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2222; GFX9-NEXT: s_lshl_b32 s1, s0, 2
2323; GFX9-NEXT: s_and_b32 s0, s0, 15
24+ ; GFX9-NEXT: s_add_i32 s1, s1, 0
2425; GFX9-NEXT: s_lshl_b32 s0, s0, 2
2526; GFX9-NEXT: scratch_store_dword off, v0, s1
2627; GFX9-NEXT: s_waitcnt vmcnt(0)
28+ ; GFX9-NEXT: s_add_i32 s0, s0, 0
2729; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
2830; GFX9-NEXT: s_waitcnt vmcnt(0)
2931; GFX9-NEXT: s_endpgm
@@ -40,6 +42,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
4042; GFX10-NEXT: s_and_b32 s1, s0, 15
4143; GFX10-NEXT: s_lshl_b32 s0, s0, 2
4244; GFX10-NEXT: s_lshl_b32 s1, s1, 2
45+ ; GFX10-NEXT: s_add_i32 s0, s0, 0
46+ ; GFX10-NEXT: s_add_i32 s1, s1, 0
4347; GFX10-NEXT: scratch_store_dword off, v0, s0
4448; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4549; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -53,6 +57,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
5357; GFX940-NEXT: s_waitcnt lgkmcnt(0)
5458; GFX940-NEXT: s_lshl_b32 s1, s0, 2
5559; GFX940-NEXT: s_and_b32 s0, s0, 15
60+ ; GFX940-NEXT: s_add_i32 s1, s1, 0
5661; GFX940-NEXT: s_lshl_b32 s0, s0, 2
5762; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
5863; GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -70,6 +75,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
7075; GFX11-NEXT: s_lshl_b32 s1, s1, 2
7176; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7277; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
78+ ; GFX11-NEXT: s_add_i32 s0, s0, 0
7379; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
7480; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
7581; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -102,9 +108,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
102108; UNALIGNED_GFX9-NEXT: s_waitcnt lgkmcnt(0)
103109; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2
104110; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15
111+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s1, 0
105112; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2
106113; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1
107114; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
115+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 0
108116; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
109117; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
110118; UNALIGNED_GFX9-NEXT: s_endpgm
@@ -121,6 +129,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
121129; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15
122130; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2
123131; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2
132+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 0
133+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s1, 0
124134; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0
125135; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
126136; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -134,6 +144,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
134144; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0)
135145; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2
136146; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15
147+ ; UNALIGNED_GFX940-NEXT: s_add_i32 s1, s1, 0
137148; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2
138149; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
139150; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -151,6 +162,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
151162; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2
152163; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
153164; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
165+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 0
154166; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
155167; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
156168; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -1911,13 +1923,13 @@ define void @store_load_large_imm_offset_foo() {
19111923; GFX9-LABEL: store_load_large_imm_offset_foo:
19121924; GFX9: ; %bb.0: ; %bb
19131925; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1914- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19151926; GFX9-NEXT: v_mov_b32_e32 v0, 13
1916- ; GFX9-NEXT: s_add_i32 s1, s32, s0
1927+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1928+ ; GFX9-NEXT: s_add_i32 s1, s32, 4
19171929; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19181930; GFX9-NEXT: s_waitcnt vmcnt(0)
19191931; GFX9-NEXT: v_mov_b32_e32 v0, 15
1920- ; GFX9-NEXT: s_add_i32 s0, s1, 4
1932+ ; GFX9-NEXT: s_add_i32 s0, s0, s1
19211933; GFX9-NEXT: scratch_store_dword off, v0, s0
19221934; GFX9-NEXT: s_waitcnt vmcnt(0)
19231935; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1928,10 +1940,10 @@ define void @store_load_large_imm_offset_foo() {
19281940; GFX10: ; %bb.0: ; %bb
19291941; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19301942; GFX10-NEXT: v_mov_b32_e32 v0, 13
1931- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
19321943; GFX10-NEXT: v_mov_b32_e32 v1, 15
1933- ; GFX10-NEXT: s_add_i32 s1, s32, s0
1934- ; GFX10-NEXT: s_add_i32 s0, s1, 4
1944+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1945+ ; GFX10-NEXT: s_add_i32 s1, s32, 4
1946+ ; GFX10-NEXT: s_add_i32 s0, s0, s1
19351947; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
19361948; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19371949; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1987,13 +1999,13 @@ define void @store_load_large_imm_offset_foo() {
19871999; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
19882000; UNALIGNED_GFX9: ; %bb.0: ; %bb
19892001; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1990- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
19912002; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
1992- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
2003+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
2004+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, 4
19932005; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19942006; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19952007; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1996- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
2008+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, s1
19972009; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
19982010; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19992011; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2004,10 +2016,10 @@ define void @store_load_large_imm_offset_foo() {
20042016; UNALIGNED_GFX10: ; %bb.0: ; %bb
20052017; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20062018; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2007- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
20082019; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2009- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2010- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2020+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2021+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, 4
2022+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, s1
20112023; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20122024; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20132025; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
0 commit comments