Skip to content

Commit f01ab1a

Browse files
committed
AMDGPU: Handle folding frame indexes into add with immediate
Frame index materialization can fold the constant offset into adds with immediates. The mubuf expansion is more complicated because we have to also insert the shift, so restrict this to one use for now. This is preparation to avoid regressions in a future patch. This also misses some cases due to visitation order. It depends on the immediate already folding into the instruction.
1 parent 6f20c30 commit f01ab1a

File tree

8 files changed

+99
-159
lines changed

8 files changed

+99
-159
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,23 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
194194
return false;
195195

196196
const unsigned Opc = UseMI.getOpcode();
197+
switch (Opc) {
198+
case AMDGPU::S_ADD_I32:
199+
case AMDGPU::V_ADD_U32_e32:
200+
case AMDGPU::V_ADD_CO_U32_e32:
201+
// TODO: Possibly relax hasOneUse. It matters more for mubuf, since we have
202+
// to insert the wave size shift at every point we use the index.
203+
// TODO: Fix depending on visit order to fold immediates into the operand
204+
return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
205+
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
206+
case AMDGPU::V_ADD_U32_e64:
207+
case AMDGPU::V_ADD_CO_U32_e64:
208+
return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
209+
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
210+
default:
211+
break;
212+
}
213+
197214
if (TII->isMUBUF(UseMI))
198215
return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
199216
if (!TII->isFLATScratch(UseMI))

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4705,8 +4705,7 @@ define amdgpu_ps void @large_offset() {
47054705
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
47064706
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
47074707
; GFX10-NEXT: v_mov_b32_e32 v0, 0
4708-
; GFX10-NEXT: s_movk_i32 s0, 0x810
4709-
; GFX10-NEXT: s_addk_i32 s0, 0x3c0
4708+
; GFX10-NEXT: s_movk_i32 s0, 0xbd0
47104709
; GFX10-NEXT: v_mov_b32_e32 v1, v0
47114710
; GFX10-NEXT: v_mov_b32_e32 v2, v0
47124711
; GFX10-NEXT: v_mov_b32_e32 v3, v0
@@ -4823,8 +4822,7 @@ define amdgpu_ps void @large_offset() {
48234822
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
48244823
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
48254824
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 0
4826-
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x810
4827-
; GFX10-PAL-NEXT: s_addk_i32 s0, 0x3c0
4825+
; GFX10-PAL-NEXT: s_movk_i32 s0, 0xbd0
48284826
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, v0
48294827
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, v0
48304828
; GFX10-PAL-NEXT: v_mov_b32_e32 v3, v0

llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,7 @@ body: |
183183
bb.0:
184184
185185
; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use
186-
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
187-
; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec
186+
; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
188187
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
189188
%0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
190189
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ stack:
1313
body: |
1414
bb.0:
1515
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
16-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
17-
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
16+
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
1817
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_U32_e32_]]
1918
; CHECK-NEXT: SI_RETURN implicit $vgpr0
2019
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -34,8 +33,7 @@ stack:
3433
body: |
3534
bb.0:
3635
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const
37-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
38-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
36+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 %stack.0, 128, 0, implicit $exec
3937
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
4038
; CHECK-NEXT: SI_RETURN implicit $vgpr0
4139
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -57,8 +55,7 @@ stack:
5755
body: |
5856
bb.0:
5957
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__const_v_fi
60-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
61-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
58+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, %stack.0, 0, implicit $exec
6259
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
6360
; CHECK-NEXT: SI_RETURN implicit $sgpr4
6461
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -78,8 +75,7 @@ stack:
7875
body: |
7976
bb.0:
8077
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_const
81-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
82-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
78+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 128, 0, implicit $exec
8379
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
8480
; CHECK-NEXT: SI_RETURN implicit $sgpr4
8581
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -99,8 +95,7 @@ stack:
9995
body: |
10096
bb.0:
10197
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64___fi_const_v
102-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
103-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
98+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, %stack.0, 0, implicit $exec
10499
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
105100
; CHECK-NEXT: SI_RETURN implicit $vgpr0
106101
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 22 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ stack:
1414
body: |
1515
bb.0:
1616
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_const
17-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
18-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MOV_B32_]], 128, implicit-def $scc
17+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def $scc
1918
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
2019
; CHECK-NEXT: SI_RETURN implicit $sgpr4
2120
%0:sreg_32 = S_MOV_B32 %stack.0
@@ -35,8 +34,7 @@ stack:
3534
body: |
3635
bb.0:
3736
; CHECK-LABEL: name: fold_frame_index__s_add_i32__const_fi
38-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
39-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, [[S_MOV_B32_]], implicit-def $scc
37+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, %stack.0, implicit-def $scc
4038
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
4139
; CHECK-NEXT: SI_RETURN implicit $sgpr4
4240
%0:sreg_32 = S_MOV_B32 %stack.0
@@ -56,8 +54,7 @@ stack:
5654
body: |
5755
bb.0:
5856
; CHECK-LABEL: name: fold_frame_index__s_add_i32__materializedconst_fi
59-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
60-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
57+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
6158
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
6259
; CHECK-NEXT: SI_RETURN implicit $sgpr4
6360
%0:sreg_32 = S_MOV_B32 256
@@ -101,8 +98,7 @@ stack:
10198
body: |
10299
bb.0:
103100
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_1
104-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
105-
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
101+
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
106102
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
107103
; CHECK-NEXT: SI_RETURN implicit $sgpr4
108104
%0:sreg_32 = S_MOV_B32 256
@@ -173,8 +169,7 @@ stack:
173169
body: |
174170
bb.0:
175171
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
176-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
177-
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
172+
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
178173
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e32_]]
179174
; CHECK-NEXT: SI_RETURN implicit $sgpr4
180175
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -215,21 +210,10 @@ stack:
215210
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
216211
body: |
217212
bb.0:
218-
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
219-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
220-
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
221-
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
222-
; GFX9-NEXT: SI_RETURN implicit $sgpr4
223-
;
224-
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
225-
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
226-
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
227-
; GFX10-NEXT: SI_RETURN implicit $sgpr4
228-
;
229-
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
230-
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
231-
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
232-
; GFX12-NEXT: SI_RETURN implicit $sgpr4
213+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
214+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
215+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
216+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
233217
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
234218
%1:vgpr_32 = V_ADD_U32_e64 64, %0, 0, implicit $exec
235219
$sgpr4 = COPY %1
@@ -246,21 +230,10 @@ stack:
246230
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
247231
body: |
248232
bb.0:
249-
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
250-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
251-
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
252-
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
253-
; GFX9-NEXT: SI_RETURN implicit $sgpr4
254-
;
255-
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
256-
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
257-
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
258-
; GFX10-NEXT: SI_RETURN implicit $sgpr4
259-
;
260-
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
261-
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
262-
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
263-
; GFX12-NEXT: SI_RETURN implicit $sgpr4
233+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
234+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
235+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
236+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
264237
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
265238
%1:vgpr_32 = V_ADD_U32_e64 %0, 64, 0, implicit $exec
266239
$sgpr4 = COPY %1
@@ -278,8 +251,7 @@ stack:
278251
body: |
279252
bb.0:
280253
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e32__const_v_fi
281-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
282-
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
254+
; CHECK: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def $vcc, implicit $exec
283255
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e32_]]
284256
; CHECK-NEXT: SI_RETURN implicit $vgpr0
285257
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -298,21 +270,10 @@ stack:
298270
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
299271
body: |
300272
bb.0:
301-
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
302-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
303-
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
304-
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
305-
; GFX9-NEXT: SI_RETURN implicit $vgpr0
306-
;
307-
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
308-
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
309-
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
310-
; GFX10-NEXT: SI_RETURN implicit $vgpr0
311-
;
312-
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
313-
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
314-
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
315-
; GFX12-NEXT: SI_RETURN implicit $vgpr0
273+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
274+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
275+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
276+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
316277
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
317278
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec
318279
$vgpr0 = COPY %1
@@ -329,21 +290,10 @@ stack:
329290
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
330291
body: |
331292
bb.0:
332-
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
333-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
334-
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
335-
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
336-
; GFX9-NEXT: SI_RETURN implicit $vgpr0
337-
;
338-
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
339-
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
340-
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
341-
; GFX10-NEXT: SI_RETURN implicit $vgpr0
342-
;
343-
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
344-
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
345-
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
346-
; GFX12-NEXT: SI_RETURN implicit $vgpr0
293+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
294+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
295+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
296+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
347297
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
348298
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 64, %0, 0, implicit $exec
349299
$vgpr0 = COPY %1

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ define void @func_mov_fi_i32_offset() #0 {
6464
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
6565
; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
6666

67-
; GFX9-FLATSCR: v_mov_b32_e32 [[ADD:v[0-9]+]], s32
68-
; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 4, [[ADD]]
67+
; FIXME: Should commute and shrink
68+
; GFX9-FLATSCR: v_add_u32_e64 v0, 4, s32
6969

7070
; GCN-NOT: v_mov
7171
; GCN: ds_write_b32 v0, v0
@@ -164,12 +164,12 @@ define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8
164164
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4 glc{{$}}
165165

166166
; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6
167-
; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]]
167+
; CI: v_add_i32_e64 [[GEP:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]
168168

169-
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
170-
; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32
169+
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
170+
; GFX9-MUBUF: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
171171

172-
; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
172+
; GFX9-FLATSCR: v_add_u32_e64 [[GEP:v[0-9]+]], 4, s32
173173

174174
; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
175175
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block(ptr addrspace(5) byval({ i8, i32 }) %arg0, i32 %arg2) #0 {

0 commit comments

Comments
 (0)