@@ -1769,9 +1769,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17691769; GFX9-NEXT: s_mov_b32 s0, 0
17701770; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
17711771; GFX9-NEXT: s_waitcnt vmcnt(0)
1772- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
17731772; GFX9-NEXT: v_mov_b32_e32 v0, 15
1774- ; GFX9-NEXT: s_add_i32 s0, s0, 4
1773+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e84
17751774; GFX9-NEXT: scratch_store_dword off, v0, s0
17761775; GFX9-NEXT: s_waitcnt vmcnt(0)
17771776; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1786,8 +1785,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17861785; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
17871786; GFX10-NEXT: v_mov_b32_e32 v0, 13
17881787; GFX10-NEXT: v_mov_b32_e32 v1, 15
1789- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1790- ; GFX10-NEXT: s_add_i32 s0, s0, 4
1788+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e84
17911789; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
17921790; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
17931791; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1799,11 +1797,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
17991797; GFX942-LABEL: store_load_large_imm_offset_kernel:
18001798; GFX942: ; %bb.0: ; %bb
18011799; GFX942-NEXT: v_mov_b32_e32 v0, 13
1802- ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
18031800; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18041801; GFX942-NEXT: s_waitcnt vmcnt(0)
18051802; GFX942-NEXT: v_mov_b32_e32 v0, 15
1806- ; GFX942-NEXT: s_add_i32 s0, s0, 4
1803+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e84
18071804; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18081805; GFX942-NEXT: s_waitcnt vmcnt(0)
18091806; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1813,9 +1810,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18131810; GFX11-LABEL: store_load_large_imm_offset_kernel:
18141811; GFX11: ; %bb.0: ; %bb
18151812; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1816- ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1817- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1818- ; GFX11-NEXT: s_add_i32 s0, s0, 4
1813+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e84
18191814; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18201815; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18211816; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1843,9 +1838,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18431838; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
18441839; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
18451840; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1846- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
18471841; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1848- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
1842+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
18491843; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
18501844; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
18511845; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1860,8 +1854,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18601854; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
18611855; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
18621856; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
1863- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
1864- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
1857+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
18651858; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
18661859; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
18671860; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1873,11 +1866,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18731866; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
18741867; UNALIGNED_GFX942: ; %bb.0: ; %bb
18751868; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
1876- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
18771869; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
18781870; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18791871; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
1880- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
1872+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
18811873; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
18821874; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
18831875; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1887,9 +1879,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
18871879; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
18881880; UNALIGNED_GFX11: ; %bb.0: ; %bb
18891881; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1890- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
1891- ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1892- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
1882+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
18931883; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
18941884; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
18951885; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1923,13 +1913,11 @@ define void @store_load_large_imm_offset_foo() {
19231913; GFX9-LABEL: store_load_large_imm_offset_foo:
19241914; GFX9: ; %bb.0: ; %bb
19251915; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19271916; GFX9-NEXT: v_mov_b32_e32 v0, 13
1928- ; GFX9-NEXT: s_add_i32 s1, s32, s0
19291917; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19301918; GFX9-NEXT: s_waitcnt vmcnt(0)
19311919; GFX9-NEXT: v_mov_b32_e32 v0, 15
1932- ; GFX9-NEXT: s_add_i32 s0, s1, 4
1920+ ; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
19331921; GFX9-NEXT: scratch_store_dword off, v0, s0
19341922; GFX9-NEXT: s_waitcnt vmcnt(0)
19351923; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1940,10 +1928,8 @@ define void @store_load_large_imm_offset_foo() {
19401928; GFX10: ; %bb.0: ; %bb
19411929; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19421930; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
19441931; GFX10-NEXT: v_mov_b32_e32 v1, 15
1945- ; GFX10-NEXT: s_add_i32 s1, s32, s0
1946- ; GFX10-NEXT: s_add_i32 s0, s1, 4
1932+ ; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
19471933; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
19481934; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19491935; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1955,13 +1941,11 @@ define void @store_load_large_imm_offset_foo() {
19551941; GFX942-LABEL: store_load_large_imm_offset_foo:
19561942; GFX942: ; %bb.0: ; %bb
19571943; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958- ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
19591944; GFX942-NEXT: v_mov_b32_e32 v0, 13
1960- ; GFX942-NEXT: s_add_i32 s1, s32, s0
19611945; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
19621946; GFX942-NEXT: s_waitcnt vmcnt(0)
19631947; GFX942-NEXT: v_mov_b32_e32 v0, 15
1964- ; GFX942-NEXT: s_add_i32 s0, s1, 4
1948+ ; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
19651949; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
19661950; GFX942-NEXT: s_waitcnt vmcnt(0)
19671951; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1972,10 +1956,7 @@ define void @store_load_large_imm_offset_foo() {
19721956; GFX11: ; %bb.0: ; %bb
19731957; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19741958; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1975- ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1976- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1977- ; GFX11-NEXT: s_add_i32 s1, s32, s0
1978- ; GFX11-NEXT: s_add_i32 s0, s1, 4
1959+ ; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
19791960; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
19801961; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19811962; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2004,13 +1985,11 @@ define void @store_load_large_imm_offset_foo() {
20041985; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
20051986; UNALIGNED_GFX9: ; %bb.0: ; %bb
20061987; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
20081988; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2009- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
20101989; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
20111990; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
20121991; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2013- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
1992+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
20141993; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
20151994; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
20161995; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2021,10 +2000,8 @@ define void @store_load_large_imm_offset_foo() {
20212000; UNALIGNED_GFX10: ; %bb.0: ; %bb
20222001; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20232002; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2024- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
20252003; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2026- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2027- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2004+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
20282005; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20292006; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20302007; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2036,13 +2013,11 @@ define void @store_load_large_imm_offset_foo() {
20362013; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
20372014; UNALIGNED_GFX942: ; %bb.0: ; %bb
20382015; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
20402016; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2041- ; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
20422017; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
20432018; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20442019; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2045- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
2020+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
20462021; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20472022; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20482023; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2053,10 +2028,7 @@ define void @store_load_large_imm_offset_foo() {
20532028; UNALIGNED_GFX11: ; %bb.0: ; %bb
20542029; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20552030; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2056- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2057- ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2058- ; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2059- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
2031+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
20602032; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
20612033; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
20622034; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments