@@ -1917,8 +1917,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19171917; GFX9-NEXT: s_mov_b32 s0, 0
19181918; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
19191919; GFX9-NEXT: s_waitcnt vmcnt(0)
1920+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19201921; GFX9-NEXT: v_mov_b32_e32 v0, 15
1921- ; GFX9-NEXT: s_movk_i32 s0, 0x3e84
1922+ ; GFX9-NEXT: s_add_i32 s0, s0, 4
19221923; GFX9-NEXT: scratch_store_dword off, v0, s0
19231924; GFX9-NEXT: s_waitcnt vmcnt(0)
19241925; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1933,7 +1934,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19331934; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
19341935; GFX10-NEXT: v_mov_b32_e32 v0, 13
19351936; GFX10-NEXT: v_mov_b32_e32 v1, 15
1936- ; GFX10-NEXT: s_movk_i32 s0, 0x3e84
1937+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1938+ ; GFX10-NEXT: s_add_i32 s0, s0, 4
19371939; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
19381940; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19391941; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1945,10 +1947,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19451947; GFX942-LABEL: store_load_large_imm_offset_kernel:
19461948; GFX942: ; %bb.0: ; %bb
19471949; GFX942-NEXT: v_mov_b32_e32 v0, 13
1950+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
19481951; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
19491952; GFX942-NEXT: s_waitcnt vmcnt(0)
19501953; GFX942-NEXT: v_mov_b32_e32 v0, 15
1951- ; GFX942-NEXT: s_movk_i32 s0, 0x3e84
1954+ ; GFX942-NEXT: s_add_i32 s0, s0, 4
19521955; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
19531956; GFX942-NEXT: s_waitcnt vmcnt(0)
19541957; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1958,7 +1961,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19581961; GFX11-LABEL: store_load_large_imm_offset_kernel:
19591962; GFX11: ; %bb.0: ; %bb
19601963; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1961- ; GFX11-NEXT: s_movk_i32 s0, 0x3e84
1964+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1965+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1966+ ; GFX11-NEXT: s_add_i32 s0, s0, 4
19621967; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
19631968; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19641969; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1986,8 +1991,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19861991; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
19871992; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
19881993; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1994+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
19891995; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1990- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
1996+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
19911997; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
19921998; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19931999; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2002,7 +2008,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
20022008; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
20032009; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
20042010; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2005- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
2011+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2012+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
20062013; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
20072014; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20082015; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2014,10 +2021,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
20142021; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
20152022; UNALIGNED_GFX942: ; %bb.0: ; %bb
20162023; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2024+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
20172025; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
20182026; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20192027; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2020- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
2028+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
20212029; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20222030; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20232031; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2027,7 +2035,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
20272035; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
20282036; UNALIGNED_GFX11: ; %bb.0: ; %bb
20292037; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2030- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
2038+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2039+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2040+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
20312041; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
20322042; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
20332043; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2061,11 +2071,13 @@ define void @store_load_large_imm_offset_foo() {
20612071; GFX9-LABEL: store_load_large_imm_offset_foo:
20622072; GFX9: ; %bb.0: ; %bb
20632073; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2074+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
20642075; GFX9-NEXT: v_mov_b32_e32 v0, 13
2076+ ; GFX9-NEXT: s_add_i32 s1, s32, s0
20652077; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
20662078; GFX9-NEXT: s_waitcnt vmcnt(0)
20672079; GFX9-NEXT: v_mov_b32_e32 v0, 15
2068- ; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2080+ ; GFX9-NEXT: s_add_i32 s0, s1, 4
20692081; GFX9-NEXT: scratch_store_dword off, v0, s0
20702082; GFX9-NEXT: s_waitcnt vmcnt(0)
20712083; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2076,8 +2088,10 @@ define void @store_load_large_imm_offset_foo() {
20762088; GFX10: ; %bb.0: ; %bb
20772089; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20782090; GFX10-NEXT: v_mov_b32_e32 v0, 13
2091+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
20792092; GFX10-NEXT: v_mov_b32_e32 v1, 15
2080- ; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2093+ ; GFX10-NEXT: s_add_i32 s1, s32, s0
2094+ ; GFX10-NEXT: s_add_i32 s0, s1, 4
20812095; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20822096; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20832097; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2089,11 +2103,13 @@ define void @store_load_large_imm_offset_foo() {
20892103; GFX942-LABEL: store_load_large_imm_offset_foo:
20902104; GFX942: ; %bb.0: ; %bb
20912105; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
20922107; GFX942-NEXT: v_mov_b32_e32 v0, 13
2108+ ; GFX942-NEXT: s_add_i32 s1, s32, s0
20932109; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
20942110; GFX942-NEXT: s_waitcnt vmcnt(0)
20952111; GFX942-NEXT: v_mov_b32_e32 v0, 15
2096- ; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2112+ ; GFX942-NEXT: s_add_i32 s0, s1, 4
20972113; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20982114; GFX942-NEXT: s_waitcnt vmcnt(0)
20992115; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2104,7 +2120,10 @@ define void @store_load_large_imm_offset_foo() {
21042120; GFX11: ; %bb.0: ; %bb
21052121; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21062122; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2107- ; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2123+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
2124+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2125+ ; GFX11-NEXT: s_add_i32 s1, s32, s0
2126+ ; GFX11-NEXT: s_add_i32 s0, s1, 4
21082127; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
21092128; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
21102129; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2133,11 +2152,13 @@ define void @store_load_large_imm_offset_foo() {
21332152; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
21342153; UNALIGNED_GFX9: ; %bb.0: ; %bb
21352154; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2155+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
21362156; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2157+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
21372158; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
21382159; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
21392160; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2140- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2161+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
21412162; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
21422163; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
21432164; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2148,8 +2169,10 @@ define void @store_load_large_imm_offset_foo() {
21482169; UNALIGNED_GFX10: ; %bb.0: ; %bb
21492170; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21502171; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2172+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
21512173; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2152- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2174+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2175+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
21532176; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
21542177; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
21552178; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2161,11 +2184,13 @@ define void @store_load_large_imm_offset_foo() {
21612184; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
21622185; UNALIGNED_GFX942: ; %bb.0: ; %bb
21632186; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
21642188; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2189+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
21652190; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
21662191; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
21672192; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2168- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2193+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
21692194; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
21702195; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
21712196; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2176,7 +2201,10 @@ define void @store_load_large_imm_offset_foo() {
21762201; UNALIGNED_GFX11: ; %bb.0: ; %bb
21772202; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21782203; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2179- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2204+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2205+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2206+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2207+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
21802208; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
21812209; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
21822210; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments