@@ -1917,8 +1917,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1917
1917
; GFX9-NEXT: s_mov_b32 s0, 0
1918
1918
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
1919
1919
; GFX9-NEXT: s_waitcnt vmcnt(0)
1920
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1920
1921
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1921
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e84
1922
+ ; GFX9-NEXT: s_add_i32 s0, s0, 4
1922
1923
; GFX9-NEXT: scratch_store_dword off, v0, s0
1923
1924
; GFX9-NEXT: s_waitcnt vmcnt(0)
1924
1925
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1933,7 +1934,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1933
1934
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1934
1935
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1935
1936
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1936
- ; GFX10-NEXT: s_movk_i32 s0, 0x3e84
1937
+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1938
+ ; GFX10-NEXT: s_add_i32 s0, s0, 4
1937
1939
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
1938
1940
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1939
1941
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1945,10 +1947,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1945
1947
; GFX942-LABEL: store_load_large_imm_offset_kernel:
1946
1948
; GFX942: ; %bb.0: ; %bb
1947
1949
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1950
+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
1948
1951
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
1949
1952
; GFX942-NEXT: s_waitcnt vmcnt(0)
1950
1953
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1951
- ; GFX942-NEXT: s_movk_i32 s0, 0x3e84
1954
+ ; GFX942-NEXT: s_add_i32 s0, s0, 4
1952
1955
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1953
1956
; GFX942-NEXT: s_waitcnt vmcnt(0)
1954
1957
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1958,7 +1961,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1958
1961
; GFX11-LABEL: store_load_large_imm_offset_kernel:
1959
1962
; GFX11: ; %bb.0: ; %bb
1960
1963
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1961
- ; GFX11-NEXT: s_movk_i32 s0, 0x3e84
1964
+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1965
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1966
+ ; GFX11-NEXT: s_add_i32 s0, s0, 4
1962
1967
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
1963
1968
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1964
1969
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1986,8 +1991,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1986
1991
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
1987
1992
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
1988
1993
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1994
+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
1989
1995
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1990
- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
1996
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
1991
1997
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
1992
1998
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1993
1999
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2002,7 +2008,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
2002
2008
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
2003
2009
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2004
2010
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2005
- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
2011
+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2012
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
2006
2013
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
2007
2014
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2008
2015
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2014,10 +2021,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
2014
2021
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
2015
2022
; UNALIGNED_GFX942: ; %bb.0: ; %bb
2016
2023
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2024
+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
2017
2025
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
2018
2026
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2019
2027
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2020
- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
2028
+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
2021
2029
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
2022
2030
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2023
2031
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2027,7 +2035,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
2027
2035
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
2028
2036
; UNALIGNED_GFX11: ; %bb.0: ; %bb
2029
2037
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2030
- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
2038
+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2039
+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2040
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
2031
2041
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
2032
2042
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2033
2043
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2061,11 +2071,13 @@ define void @store_load_large_imm_offset_foo() {
2061
2071
; GFX9-LABEL: store_load_large_imm_offset_foo:
2062
2072
; GFX9: ; %bb.0: ; %bb
2063
2073
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2074
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
2064
2075
; GFX9-NEXT: v_mov_b32_e32 v0, 13
2076
+ ; GFX9-NEXT: s_add_i32 s1, s32, s0
2065
2077
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
2066
2078
; GFX9-NEXT: s_waitcnt vmcnt(0)
2067
2079
; GFX9-NEXT: v_mov_b32_e32 v0, 15
2068
- ; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2080
+ ; GFX9-NEXT: s_add_i32 s0, s1, 4
2069
2081
; GFX9-NEXT: scratch_store_dword off, v0, s0
2070
2082
; GFX9-NEXT: s_waitcnt vmcnt(0)
2071
2083
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2076,8 +2088,10 @@ define void @store_load_large_imm_offset_foo() {
2076
2088
; GFX10: ; %bb.0: ; %bb
2077
2089
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2078
2090
; GFX10-NEXT: v_mov_b32_e32 v0, 13
2091
+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
2079
2092
; GFX10-NEXT: v_mov_b32_e32 v1, 15
2080
- ; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2093
+ ; GFX10-NEXT: s_add_i32 s1, s32, s0
2094
+ ; GFX10-NEXT: s_add_i32 s0, s1, 4
2081
2095
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
2082
2096
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2083
2097
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2089,11 +2103,13 @@ define void @store_load_large_imm_offset_foo() {
2089
2103
; GFX942-LABEL: store_load_large_imm_offset_foo:
2090
2104
; GFX942: ; %bb.0: ; %bb
2091
2105
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106
+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
2092
2107
; GFX942-NEXT: v_mov_b32_e32 v0, 13
2108
+ ; GFX942-NEXT: s_add_i32 s1, s32, s0
2093
2109
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
2094
2110
; GFX942-NEXT: s_waitcnt vmcnt(0)
2095
2111
; GFX942-NEXT: v_mov_b32_e32 v0, 15
2096
- ; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2112
+ ; GFX942-NEXT: s_add_i32 s0, s1, 4
2097
2113
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
2098
2114
; GFX942-NEXT: s_waitcnt vmcnt(0)
2099
2115
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2104,7 +2120,10 @@ define void @store_load_large_imm_offset_foo() {
2104
2120
; GFX11: ; %bb.0: ; %bb
2105
2121
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106
2122
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2107
- ; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2123
+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
2124
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2125
+ ; GFX11-NEXT: s_add_i32 s1, s32, s0
2126
+ ; GFX11-NEXT: s_add_i32 s0, s1, 4
2108
2127
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
2109
2128
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2110
2129
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2133,11 +2152,13 @@ define void @store_load_large_imm_offset_foo() {
2133
2152
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
2134
2153
; UNALIGNED_GFX9: ; %bb.0: ; %bb
2135
2154
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2155
+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
2136
2156
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2157
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
2137
2158
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
2138
2159
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
2139
2160
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2140
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2161
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
2141
2162
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
2142
2163
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
2143
2164
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2148,8 +2169,10 @@ define void @store_load_large_imm_offset_foo() {
2148
2169
; UNALIGNED_GFX10: ; %bb.0: ; %bb
2149
2170
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150
2171
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2172
+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2151
2173
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2152
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2174
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2175
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2153
2176
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
2154
2177
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2155
2178
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2161,11 +2184,13 @@ define void @store_load_large_imm_offset_foo() {
2161
2184
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
2162
2185
; UNALIGNED_GFX942: ; %bb.0: ; %bb
2163
2186
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187
+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
2164
2188
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2189
+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
2165
2190
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
2166
2191
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2167
2192
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2168
- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2193
+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
2169
2194
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
2170
2195
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2171
2196
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2176,7 +2201,10 @@ define void @store_load_large_imm_offset_foo() {
2176
2201
; UNALIGNED_GFX11: ; %bb.0: ; %bb
2177
2202
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2178
2203
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2179
- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2204
+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2205
+ ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2206
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2207
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
2180
2208
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
2181
2209
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2182
2210
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments