@@ -1769,8 +1769,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17691769; GFX9-NEXT: s_mov_b32 s0, 0
17701770; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
17711771; GFX9-NEXT: s_waitcnt vmcnt(0)
1772+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
17721773; GFX9-NEXT: v_mov_b32_e32 v0, 15
1773- ; GFX9-NEXT: s_movk_i32 s0, 0x3e84
1774+ ; GFX9-NEXT: s_add_i32 s0, s0, 4
17741775; GFX9-NEXT: scratch_store_dword off, v0, s0
17751776; GFX9-NEXT: s_waitcnt vmcnt(0)
17761777; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1785,7 +1786,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17851786; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
17861787; GFX10-NEXT: v_mov_b32_e32 v0, 13
17871788; GFX10-NEXT: v_mov_b32_e32 v1, 15
1788- ; GFX10-NEXT: s_movk_i32 s0, 0x3e84
1789+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1790+ ; GFX10-NEXT: s_add_i32 s0, s0, 4
17891791; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
17901792; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
17911793; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1797,10 +1799,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17971799; GFX942-LABEL: store_load_large_imm_offset_kernel:
17981800; GFX942: ; %bb.0: ; %bb
17991801; GFX942-NEXT: v_mov_b32_e32 v0, 13
1802+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
18001803; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18011804; GFX942-NEXT: s_waitcnt vmcnt(0)
18021805; GFX942-NEXT: v_mov_b32_e32 v0, 15
1803- ; GFX942-NEXT: s_movk_i32 s0, 0x3e84
1806+ ; GFX942-NEXT: s_add_i32 s0, s0, 4
18041807; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18051808; GFX942-NEXT: s_waitcnt vmcnt(0)
18061809; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1810,7 +1813,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18101813; GFX11-LABEL: store_load_large_imm_offset_kernel:
18111814; GFX11: ; %bb.0: ; %bb
18121815; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1813- ; GFX11-NEXT: s_movk_i32 s0, 0x3e84
1816+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1817+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1818+ ; GFX11-NEXT: s_add_i32 s0, s0, 4
18141819; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18151820; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18161821; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1838,8 +1843,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18381843; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
18391844; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
18401845; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1846+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
18411847; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1842- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
1848+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
18431849; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
18441850; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
18451851; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1854,7 +1860,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18541860; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
18551861; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
18561862; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
1857- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
1863+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
1864+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
18581865; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
18591866; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
18601867; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1866,10 +1873,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18661873; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
18671874; UNALIGNED_GFX942: ; %bb.0: ; %bb
18681875; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
1876+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
18691877; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18701878; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18711879; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
1872- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
1880+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
18731881; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18741882; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18751883; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1879,7 +1887,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18791887; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
18801888; UNALIGNED_GFX11: ; %bb.0: ; %bb
18811889; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1882- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
1890+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
1891+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1892+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
18831893; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18841894; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18851895; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1913,11 +1923,13 @@ define void @store_load_large_imm_offset_foo() {
19131923; GFX9-LABEL: store_load_large_imm_offset_foo:
19141924; GFX9: ; %bb.0: ; %bb
19151925; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19161927; GFX9-NEXT: v_mov_b32_e32 v0, 13
1928+ ; GFX9-NEXT: s_add_i32 s1, s32, s0
19171929; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19181930; GFX9-NEXT: s_waitcnt vmcnt(0)
19191931; GFX9-NEXT: v_mov_b32_e32 v0, 15
1920- ; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
1932+ ; GFX9-NEXT: s_add_i32 s0, s1, 4
19211933; GFX9-NEXT: scratch_store_dword off, v0, s0
19221934; GFX9-NEXT: s_waitcnt vmcnt(0)
19231935; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1928,8 +1940,10 @@ define void @store_load_large_imm_offset_foo() {
19281940; GFX10: ; %bb.0: ; %bb
19291941; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19301942; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
19311944; GFX10-NEXT: v_mov_b32_e32 v1, 15
1932- ; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
1945+ ; GFX10-NEXT: s_add_i32 s1, s32, s0
1946+ ; GFX10-NEXT: s_add_i32 s0, s1, 4
19331947; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
19341948; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19351949; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1941,11 +1955,13 @@ define void @store_load_large_imm_offset_foo() {
19411955; GFX942-LABEL: store_load_large_imm_offset_foo:
19421956; GFX942: ; %bb.0: ; %bb
19431957; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
19441959; GFX942-NEXT: v_mov_b32_e32 v0, 13
1960+ ; GFX942-NEXT: s_add_i32 s1, s32, s0
19451961; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
19461962; GFX942-NEXT: s_waitcnt vmcnt(0)
19471963; GFX942-NEXT: v_mov_b32_e32 v0, 15
1948- ; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
1964+ ; GFX942-NEXT: s_add_i32 s0, s1, 4
19491965; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
19501966; GFX942-NEXT: s_waitcnt vmcnt(0)
19511967; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1956,7 +1972,10 @@ define void @store_load_large_imm_offset_foo() {
19561972; GFX11: ; %bb.0: ; %bb
19571973; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19581974; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1959- ; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
1975+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1976+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1977+ ; GFX11-NEXT: s_add_i32 s1, s32, s0
1978+ ; GFX11-NEXT: s_add_i32 s0, s1, 4
19601979; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
19611980; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19621981; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1985,11 +2004,13 @@ define void @store_load_large_imm_offset_foo() {
19852004; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
19862005; UNALIGNED_GFX9: ; %bb.0: ; %bb
19872006; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
19882008; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2009+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
19892010; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19902011; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19912012; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1992- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2013+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
19932014; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
19942015; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19952016; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2000,8 +2021,10 @@ define void @store_load_large_imm_offset_foo() {
20002021; UNALIGNED_GFX10: ; %bb.0: ; %bb
20012022; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20022023; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2024+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
20032025; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2004- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2026+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2027+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
20052028; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20062029; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20072030; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2013,11 +2036,13 @@ define void @store_load_large_imm_offset_foo() {
20132036; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
20142037; UNALIGNED_GFX942: ; %bb.0: ; %bb
20152038; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
20162040; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2041+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
20172042; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
20182043; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20192044; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2020- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2045+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
20212046; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20222047; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20232048; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2028,7 +2053,10 @@ define void @store_load_large_imm_offset_foo() {
20282053; UNALIGNED_GFX11: ; %bb.0: ; %bb
20292054; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20302055; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2031- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2056+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2057+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2058+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2059+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
20322060; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
20332061; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
20342062; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments