Skip to content

Commit 32161e9

Browse files
changpengarsenm
andauthored
[AMDGPU] Do not fold an immediate into instructions with frame indexes (#151263)
Do not fold an immediate into an instruction that already has a frame index operand. A frame index could possibly turn out to be another immediate. Fixes: SWDEV-536263 --------- Co-authored-by: Matt Arsenault <[email protected]>
1 parent 35bd40d commit 32161e9

9 files changed

+220
-50
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6122,10 +6122,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
61226122
!Op.isIdenticalTo(*MO))
61236123
return false;
61246124

6125-
// Do not fold a frame index into an instruction that already has a frame
6126-
// index. The frame index handling code doesn't handle fixing up operand
6127-
// constraints if there are multiple indexes.
6128-
if (Op.isFI() && MO->isFI())
6125+
// Do not fold a non-inlineable and non-register operand into an
6126+
// instruction that already has a frame index. The frame index handling
6127+
// code could not handle well when a frame index co-exists with another
6128+
// non-register operand, unless that operand is an inlineable immediate.
6129+
if (Op.isFI())
61296130
return false;
61306131
}
61316132
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,8 +1917,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19171917
; GFX9-NEXT: s_mov_b32 s0, 0
19181918
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
19191919
; GFX9-NEXT: s_waitcnt vmcnt(0)
1920+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19201921
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1921-
; GFX9-NEXT: s_movk_i32 s0, 0x3e84
1922+
; GFX9-NEXT: s_add_i32 s0, s0, 4
19221923
; GFX9-NEXT: scratch_store_dword off, v0, s0
19231924
; GFX9-NEXT: s_waitcnt vmcnt(0)
19241925
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1933,7 +1934,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19331934
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
19341935
; GFX10-NEXT: v_mov_b32_e32 v0, 13
19351936
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1936-
; GFX10-NEXT: s_movk_i32 s0, 0x3e84
1937+
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1938+
; GFX10-NEXT: s_add_i32 s0, s0, 4
19371939
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
19381940
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19391941
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1945,10 +1947,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19451947
; GFX942-LABEL: store_load_large_imm_offset_kernel:
19461948
; GFX942: ; %bb.0: ; %bb
19471949
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1950+
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
19481951
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
19491952
; GFX942-NEXT: s_waitcnt vmcnt(0)
19501953
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1951-
; GFX942-NEXT: s_movk_i32 s0, 0x3e84
1954+
; GFX942-NEXT: s_add_i32 s0, s0, 4
19521955
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
19531956
; GFX942-NEXT: s_waitcnt vmcnt(0)
19541957
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1958,7 +1961,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19581961
; GFX11-LABEL: store_load_large_imm_offset_kernel:
19591962
; GFX11: ; %bb.0: ; %bb
19601963
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1961-
; GFX11-NEXT: s_movk_i32 s0, 0x3e84
1964+
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1965+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1966+
; GFX11-NEXT: s_add_i32 s0, s0, 4
19621967
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
19631968
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
19641969
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1986,8 +1991,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
19861991
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
19871992
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
19881993
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1994+
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
19891995
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1990-
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
1996+
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
19911997
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
19921998
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19931999
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2002,7 +2008,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
20022008
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
20032009
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
20042010
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2005-
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
2011+
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2012+
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
20062013
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
20072014
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20082015
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2014,10 +2021,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
20142021
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
20152022
; UNALIGNED_GFX942: ; %bb.0: ; %bb
20162023
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2024+
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
20172025
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
20182026
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20192027
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2020-
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
2028+
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
20212029
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20222030
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
20232031
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2027,7 +2035,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
20272035
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
20282036
; UNALIGNED_GFX11: ; %bb.0: ; %bb
20292037
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2030-
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
2038+
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2039+
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2040+
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
20312041
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
20322042
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
20332043
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2061,11 +2071,13 @@ define void @store_load_large_imm_offset_foo() {
20612071
; GFX9-LABEL: store_load_large_imm_offset_foo:
20622072
; GFX9: ; %bb.0: ; %bb
20632073
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2074+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
20642075
; GFX9-NEXT: v_mov_b32_e32 v0, 13
2076+
; GFX9-NEXT: s_add_i32 s1, s32, s0
20652077
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
20662078
; GFX9-NEXT: s_waitcnt vmcnt(0)
20672079
; GFX9-NEXT: v_mov_b32_e32 v0, 15
2068-
; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2080+
; GFX9-NEXT: s_add_i32 s0, s1, 4
20692081
; GFX9-NEXT: scratch_store_dword off, v0, s0
20702082
; GFX9-NEXT: s_waitcnt vmcnt(0)
20712083
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2076,8 +2088,10 @@ define void @store_load_large_imm_offset_foo() {
20762088
; GFX10: ; %bb.0: ; %bb
20772089
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20782090
; GFX10-NEXT: v_mov_b32_e32 v0, 13
2091+
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
20792092
; GFX10-NEXT: v_mov_b32_e32 v1, 15
2080-
; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2093+
; GFX10-NEXT: s_add_i32 s1, s32, s0
2094+
; GFX10-NEXT: s_add_i32 s0, s1, 4
20812095
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20822096
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20832097
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2089,11 +2103,13 @@ define void @store_load_large_imm_offset_foo() {
20892103
; GFX942-LABEL: store_load_large_imm_offset_foo:
20902104
; GFX942: ; %bb.0: ; %bb
20912105
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106+
; GFX942-NEXT: s_movk_i32 s0, 0x3e80
20922107
; GFX942-NEXT: v_mov_b32_e32 v0, 13
2108+
; GFX942-NEXT: s_add_i32 s1, s32, s0
20932109
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
20942110
; GFX942-NEXT: s_waitcnt vmcnt(0)
20952111
; GFX942-NEXT: v_mov_b32_e32 v0, 15
2096-
; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2112+
; GFX942-NEXT: s_add_i32 s0, s1, 4
20972113
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
20982114
; GFX942-NEXT: s_waitcnt vmcnt(0)
20992115
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2104,7 +2120,10 @@ define void @store_load_large_imm_offset_foo() {
21042120
; GFX11: ; %bb.0: ; %bb
21052121
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21062122
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2107-
; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2123+
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
2124+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2125+
; GFX11-NEXT: s_add_i32 s1, s32, s0
2126+
; GFX11-NEXT: s_add_i32 s0, s1, 4
21082127
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
21092128
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
21102129
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2133,11 +2152,13 @@ define void @store_load_large_imm_offset_foo() {
21332152
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
21342153
; UNALIGNED_GFX9: ; %bb.0: ; %bb
21352154
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2155+
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
21362156
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2157+
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
21372158
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
21382159
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
21392160
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2140-
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2161+
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
21412162
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
21422163
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
21432164
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2148,8 +2169,10 @@ define void @store_load_large_imm_offset_foo() {
21482169
; UNALIGNED_GFX10: ; %bb.0: ; %bb
21492170
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21502171
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2172+
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
21512173
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2152-
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2174+
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2175+
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
21532176
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
21542177
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
21552178
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2161,11 +2184,13 @@ define void @store_load_large_imm_offset_foo() {
21612184
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
21622185
; UNALIGNED_GFX942: ; %bb.0: ; %bb
21632186
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187+
; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
21642188
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2189+
; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
21652190
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
21662191
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
21672192
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2168-
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2193+
; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
21692194
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
21702195
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
21712196
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2176,7 +2201,10 @@ define void @store_load_large_imm_offset_foo() {
21762201
; UNALIGNED_GFX11: ; %bb.0: ; %bb
21772202
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21782203
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2179-
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2204+
; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2205+
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2206+
; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2207+
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
21802208
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
21812209
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
21822210
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3621,7 +3621,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
36213621
; GFX9-NEXT: s_mov_b32 s0, 0
36223622
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
36233623
; GFX9-NEXT: s_waitcnt vmcnt(0)
3624-
; GFX9-NEXT: s_movk_i32 s0, 0x3004
3624+
; GFX9-NEXT: s_movk_i32 s0, 0x3000
3625+
; GFX9-NEXT: s_add_i32 s0, s0, 4
36253626
; GFX9-NEXT: v_mov_b32_e32 v0, 15
36263627
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
36273628
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -3637,7 +3638,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
36373638
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
36383639
; GFX10-NEXT: v_mov_b32_e32 v0, 13
36393640
; GFX10-NEXT: v_mov_b32_e32 v1, 15
3640-
; GFX10-NEXT: s_movk_i32 s0, 0x3804
3641+
; GFX10-NEXT: s_movk_i32 s0, 0x3800
3642+
; GFX10-NEXT: s_add_i32 s0, s0, 4
36413643
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
36423644
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
36433645
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
@@ -3682,7 +3684,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
36823684
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
36833685
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:4
36843686
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
3685-
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3004
3687+
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
3688+
; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4
36863689
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
36873690
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
36883691
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
@@ -3716,8 +3719,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
37163719
; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
37173720
; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 13
37183721
; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15
3722+
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800
37193723
; GFX1010-PAL-NEXT: s_mov_b32 s1, 0
3720-
; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3804
3724+
; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4
37213725
; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4
37223726
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
37233727
; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
@@ -3739,7 +3743,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
37393743
; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
37403744
; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13
37413745
; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15
3742-
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3804
3746+
; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800
3747+
; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4
37433748
; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4
37443749
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
37453750
; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
@@ -3785,10 +3790,12 @@ define void @store_load_large_imm_offset_foo() {
37853790
; GFX9-LABEL: store_load_large_imm_offset_foo:
37863791
; GFX9: ; %bb.0: ; %bb
37873792
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3793+
; GFX9-NEXT: s_movk_i32 s0, 0x3000
37883794
; GFX9-NEXT: v_mov_b32_e32 v0, 13
3795+
; GFX9-NEXT: s_add_i32 s1, s32, s0
37893796
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
37903797
; GFX9-NEXT: s_waitcnt vmcnt(0)
3791-
; GFX9-NEXT: s_add_i32 s0, s32, 0x3004
3798+
; GFX9-NEXT: s_add_i32 s0, s1, 4
37923799
; GFX9-NEXT: v_mov_b32_e32 v0, 15
37933800
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
37943801
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -3800,8 +3807,10 @@ define void @store_load_large_imm_offset_foo() {
38003807
; GFX10: ; %bb.0: ; %bb
38013808
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38023809
; GFX10-NEXT: v_mov_b32_e32 v0, 13
3810+
; GFX10-NEXT: s_movk_i32 s0, 0x3800
38033811
; GFX10-NEXT: v_mov_b32_e32 v1, 15
3804-
; GFX10-NEXT: s_add_i32 s0, s32, 0x3804
3812+
; GFX10-NEXT: s_add_i32 s1, s32, s0
3813+
; GFX10-NEXT: s_add_i32 s0, s1, 4
38053814
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
38063815
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
38073816
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
@@ -3843,10 +3852,12 @@ define void @store_load_large_imm_offset_foo() {
38433852
; GFX9-PAL-LABEL: store_load_large_imm_offset_foo:
38443853
; GFX9-PAL: ; %bb.0: ; %bb
38453854
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3855+
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
38463856
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
3857+
; GFX9-PAL-NEXT: s_add_i32 s1, s32, s0
38473858
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
38483859
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
3849-
; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x3004
3860+
; GFX9-PAL-NEXT: s_add_i32 s0, s1, 4
38503861
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
38513862
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
38523863
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
@@ -3872,8 +3883,10 @@ define void @store_load_large_imm_offset_foo() {
38723883
; GFX10-PAL: ; %bb.0: ; %bb
38733884
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38743885
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
3886+
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
38753887
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
3876-
; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x3804
3888+
; GFX10-PAL-NEXT: s_add_i32 s1, s32, s0
3889+
; GFX10-PAL-NEXT: s_add_i32 s0, s1, 4
38773890
; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
38783891
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
38793892
; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ stack:
7575
body: |
7676
bb.0:
7777
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_0
78-
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 256, implicit-def $scc
78+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 256
79+
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]], implicit-def $scc
7980
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
8081
; CHECK-NEXT: SI_RETURN implicit $sgpr4
8182
%0:sreg_32 = S_MOV_B32 %stack.0

llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ body: |
4646
%2:sreg_32 = S_LSHL2_ADD_U32 %0, %1, implicit-def $scc
4747
...
4848
# GCN-LABEL: name: test_frameindex{{$}}
49-
# GCN: %1:sreg_32 = S_ADD_I32 %stack.0, 70
49+
# GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 70
50+
# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]]
5051
---
5152
name: test_frameindex
5253
tracksRegLiveness: true

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,8 @@ entry:
360360
; s_add_i32.
361361

362362
; GCN-LABEL: {{^}}fi_sop2_s_add_u32_literal_error:
363-
; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010
363+
; GCN: s_movk_i32 [[S_MOVK_I32_:s[0-9]+]], 0x1000
364+
; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0x1010, [[S_MOVK_I32_]]
364365
; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0
365366
define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 {
366367
entry:

llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,24 @@ define amdgpu_gfx [13 x i32] @issue130120() {
66
; CHECK: ; %bb.0: ; %bb
77
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88
; CHECK-NEXT: v_mov_b32_e32 v0, 0
9-
; CHECK-NEXT: s_add_i32 s0, s32, 0xf0
10-
; CHECK-NEXT: s_add_i32 s1, s32, 0xf4
11-
; CHECK-NEXT: s_add_i32 s2, s32, 0xf8
12-
; CHECK-NEXT: s_add_i32 s3, s32, 0xfc
9+
; CHECK-NEXT: s_movk_i32 s1, 0xf4
10+
; CHECK-NEXT: s_movk_i32 s2, 0xf8
11+
; CHECK-NEXT: s_movk_i32 s3, 0xfc
12+
; CHECK-NEXT: s_movk_i32 s34, 0x100
1313
; CHECK-NEXT: v_mov_b32_e32 v1, v0
14-
; CHECK-NEXT: s_add_i32 s34, s32, 0x100
15-
; CHECK-NEXT: s_add_i32 s35, s32, 0x104
16-
; CHECK-NEXT: s_add_i32 s36, s32, 0x108
17-
; CHECK-NEXT: s_add_i32 s37, s32, 0x110
18-
; CHECK-NEXT: s_add_i32 s38, s32, 0x120
14+
; CHECK-NEXT: s_movk_i32 s35, 0x104
15+
; CHECK-NEXT: s_movk_i32 s36, 0x108
16+
; CHECK-NEXT: s_movk_i32 s37, 0x110
17+
; CHECK-NEXT: s_movk_i32 s38, 0x120
18+
; CHECK-NEXT: s_add_i32 s0, s32, 0xf0
19+
; CHECK-NEXT: s_add_i32 s1, s32, s1
20+
; CHECK-NEXT: s_add_i32 s2, s32, s2
21+
; CHECK-NEXT: s_add_i32 s3, s32, s3
22+
; CHECK-NEXT: s_add_i32 s34, s32, s34
23+
; CHECK-NEXT: s_add_i32 s35, s32, s35
24+
; CHECK-NEXT: s_add_i32 s36, s32, s36
25+
; CHECK-NEXT: s_add_i32 s37, s32, s37
26+
; CHECK-NEXT: s_add_i32 s38, s32, s38
1927
; CHECK-NEXT: s_or_b32 s39, s32, 4
2028
; CHECK-NEXT: s_or_b32 s40, s32, 8
2129
; CHECK-NEXT: s_or_b32 s41, s32, 12

0 commit comments

Comments
 (0)