Skip to content

Commit ef91cd3

Browse files
authored
AMDGPU: Handle folding frame indexes into add with immediate (#110738)
1 parent f87f3ad commit ef91cd3

8 files changed

+99
-159
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,23 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
194194
return false;
195195

196196
const unsigned Opc = UseMI.getOpcode();
197+
switch (Opc) {
198+
case AMDGPU::S_ADD_I32:
199+
case AMDGPU::V_ADD_U32_e32:
200+
case AMDGPU::V_ADD_CO_U32_e32:
201+
// TODO: Possibly relax hasOneUse. It matters more for mubuf, since we have
202+
// to insert the wave size shift at every point we use the index.
203+
// TODO: Fix depending on visit order to fold immediates into the operand
204+
return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
205+
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
206+
case AMDGPU::V_ADD_U32_e64:
207+
case AMDGPU::V_ADD_CO_U32_e64:
208+
return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
209+
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
210+
default:
211+
break;
212+
}
213+
197214
if (TII->isMUBUF(UseMI))
198215
return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
199216
if (!TII->isFLATScratch(UseMI))

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4705,8 +4705,7 @@ define amdgpu_ps void @large_offset() {
47054705
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
47064706
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
47074707
; GFX10-NEXT: v_mov_b32_e32 v0, 0
4708-
; GFX10-NEXT: s_movk_i32 s0, 0x810
4709-
; GFX10-NEXT: s_addk_i32 s0, 0x3c0
4708+
; GFX10-NEXT: s_movk_i32 s0, 0xbd0
47104709
; GFX10-NEXT: v_mov_b32_e32 v1, v0
47114710
; GFX10-NEXT: v_mov_b32_e32 v2, v0
47124711
; GFX10-NEXT: v_mov_b32_e32 v3, v0
@@ -4823,8 +4822,7 @@ define amdgpu_ps void @large_offset() {
48234822
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
48244823
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
48254824
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 0
4826-
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x810
4827-
; GFX10-PAL-NEXT: s_addk_i32 s0, 0x3c0
4825+
; GFX10-PAL-NEXT: s_movk_i32 s0, 0xbd0
48284826
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, v0
48294827
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, v0
48304828
; GFX10-PAL-NEXT: v_mov_b32_e32 v3, v0

llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,7 @@ body: |
183183
bb.0:
184184
185185
; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use
186-
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
187-
; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec
186+
; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
188187
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
189188
%0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
190189
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ stack:
1313
body: |
1414
bb.0:
1515
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
16-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
17-
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
16+
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
1817
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_U32_e32_]]
1918
; CHECK-NEXT: SI_RETURN implicit $vgpr0
2019
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -34,8 +33,7 @@ stack:
3433
body: |
3534
bb.0:
3635
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const
37-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
38-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
36+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 %stack.0, 128, 0, implicit $exec
3937
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
4038
; CHECK-NEXT: SI_RETURN implicit $vgpr0
4139
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -57,8 +55,7 @@ stack:
5755
body: |
5856
bb.0:
5957
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__const_v_fi
60-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
61-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
58+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, %stack.0, 0, implicit $exec
6259
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
6360
; CHECK-NEXT: SI_RETURN implicit $sgpr4
6461
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -78,8 +75,7 @@ stack:
7875
body: |
7976
bb.0:
8077
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_const
81-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
82-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
78+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 128, 0, implicit $exec
8379
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
8480
; CHECK-NEXT: SI_RETURN implicit $sgpr4
8581
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -99,8 +95,7 @@ stack:
9995
body: |
10096
bb.0:
10197
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64___fi_const_v
102-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
103-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
98+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, %stack.0, 0, implicit $exec
10499
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
105100
; CHECK-NEXT: SI_RETURN implicit $vgpr0
106101
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 22 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ stack:
1414
body: |
1515
bb.0:
1616
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_const
17-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
18-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MOV_B32_]], 128, implicit-def $scc
17+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def $scc
1918
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
2019
; CHECK-NEXT: SI_RETURN implicit $sgpr4
2120
%0:sreg_32 = S_MOV_B32 %stack.0
@@ -35,8 +34,7 @@ stack:
3534
body: |
3635
bb.0:
3736
; CHECK-LABEL: name: fold_frame_index__s_add_i32__const_fi
38-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
39-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, [[S_MOV_B32_]], implicit-def $scc
37+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, %stack.0, implicit-def $scc
4038
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
4139
; CHECK-NEXT: SI_RETURN implicit $sgpr4
4240
%0:sreg_32 = S_MOV_B32 %stack.0
@@ -56,8 +54,7 @@ stack:
5654
body: |
5755
bb.0:
5856
; CHECK-LABEL: name: fold_frame_index__s_add_i32__materializedconst_fi
59-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
60-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
57+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
6158
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
6259
; CHECK-NEXT: SI_RETURN implicit $sgpr4
6360
%0:sreg_32 = S_MOV_B32 256
@@ -101,8 +98,7 @@ stack:
10198
body: |
10299
bb.0:
103100
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_1
104-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
105-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
101+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
106102
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
107103
; CHECK-NEXT: SI_RETURN implicit $sgpr4
108104
%0:sreg_32 = S_MOV_B32 256
@@ -173,8 +169,7 @@ stack:
173169
body: |
174170
bb.0:
175171
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
176-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
177-
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
172+
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
178173
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e32_]]
179174
; CHECK-NEXT: SI_RETURN implicit $sgpr4
180175
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -215,21 +210,10 @@ stack:
215210
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
216211
body: |
217212
bb.0:
218-
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
219-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
220-
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
221-
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
222-
; GFX9-NEXT: SI_RETURN implicit $sgpr4
223-
;
224-
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
225-
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
226-
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
227-
; GFX10-NEXT: SI_RETURN implicit $sgpr4
228-
;
229-
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
230-
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
231-
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
232-
; GFX12-NEXT: SI_RETURN implicit $sgpr4
213+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
214+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
215+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
216+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
233217
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
234218
%1:vgpr_32 = V_ADD_U32_e64 64, %0, 0, implicit $exec
235219
$sgpr4 = COPY %1
@@ -246,21 +230,10 @@ stack:
246230
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
247231
body: |
248232
bb.0:
249-
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
250-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
251-
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
252-
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
253-
; GFX9-NEXT: SI_RETURN implicit $sgpr4
254-
;
255-
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
256-
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
257-
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
258-
; GFX10-NEXT: SI_RETURN implicit $sgpr4
259-
;
260-
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
261-
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
262-
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
263-
; GFX12-NEXT: SI_RETURN implicit $sgpr4
233+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
234+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
235+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
236+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
264237
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
265238
%1:vgpr_32 = V_ADD_U32_e64 %0, 64, 0, implicit $exec
266239
$sgpr4 = COPY %1
@@ -278,8 +251,7 @@ stack:
278251
body: |
279252
bb.0:
280253
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e32__const_v_fi
281-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
282-
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
254+
; CHECK: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def $vcc, implicit $exec
283255
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e32_]]
284256
; CHECK-NEXT: SI_RETURN implicit $vgpr0
285257
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -298,21 +270,10 @@ stack:
298270
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
299271
body: |
300272
bb.0:
301-
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
302-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
303-
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
304-
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
305-
; GFX9-NEXT: SI_RETURN implicit $vgpr0
306-
;
307-
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
308-
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
309-
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
310-
; GFX10-NEXT: SI_RETURN implicit $vgpr0
311-
;
312-
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
313-
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
314-
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
315-
; GFX12-NEXT: SI_RETURN implicit $vgpr0
273+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
274+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
275+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
276+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
316277
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
317278
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec
318279
$vgpr0 = COPY %1
@@ -329,21 +290,10 @@ stack:
329290
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
330291
body: |
331292
bb.0:
332-
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
333-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
334-
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
335-
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
336-
; GFX9-NEXT: SI_RETURN implicit $vgpr0
337-
;
338-
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
339-
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
340-
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
341-
; GFX10-NEXT: SI_RETURN implicit $vgpr0
342-
;
343-
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
344-
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
345-
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
346-
; GFX12-NEXT: SI_RETURN implicit $vgpr0
293+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
294+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
295+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
296+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
347297
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
348298
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 64, %0, 0, implicit $exec
349299
$vgpr0 = COPY %1

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ define void @func_mov_fi_i32_offset() #0 {
6464
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
6565
; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
6666

67-
; GFX9-FLATSCR: v_mov_b32_e32 [[ADD:v[0-9]+]], s32
68-
; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 4, [[ADD]]
67+
; FIXME: Should commute and shrink
68+
; GFX9-FLATSCR: v_add_u32_e64 v0, 4, s32
6969

7070
; GCN-NOT: v_mov
7171
; GCN: ds_write_b32 v0, v0
@@ -164,12 +164,12 @@ define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8
164164
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4 glc{{$}}
165165

166166
; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6
167-
; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]]
167+
; CI: v_add_i32_e64 [[GEP:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]
168168

169-
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
170-
; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32
169+
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
170+
; GFX9-MUBUF: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
171171

172-
; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
172+
; GFX9-FLATSCR: v_add_u32_e64 [[GEP:v[0-9]+]], 4, s32
173173

174174
; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
175175
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block(ptr addrspace(5) byval({ i8, i32 }) %arg0, i32 %arg2) #0 {

0 commit comments

Comments
 (0)