Skip to content

Commit eafc801

Browse files
committed
Handle vop3 cases
1 parent 295561a commit eafc801

File tree

7 files changed

+73
-123
lines changed

7 files changed

+73
-123
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,12 +198,15 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
198198
case AMDGPU::S_ADD_I32:
199199
case AMDGPU::V_ADD_U32_e32:
200200
case AMDGPU::V_ADD_CO_U32_e32:
201-
// TODO: Handle e64 variants
202201
// TODO: Possibly relax hasOneUse. It matters more for mubuf, since we have
203202
// to insert the wave size shift at every point we use the index.
204203
// TODO: Fix depending on visit order to fold immediates into the operand
205204
return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
206205
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
206+
case AMDGPU::V_ADD_U32_e64:
207+
case AMDGPU::V_ADD_CO_U32_e64:
208+
return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
209+
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
207210
default:
208211
break;
209212
}

llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,7 @@ body: |
183183
bb.0:
184184
185185
; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use
186-
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
187-
; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec
186+
; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
188187
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
189188
%0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
190189
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ stack:
3333
body: |
3434
bb.0:
3535
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const
36-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
37-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
36+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 %stack.0, 128, 0, implicit $exec
3837
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
3938
; CHECK-NEXT: SI_RETURN implicit $vgpr0
4039
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -56,8 +55,7 @@ stack:
5655
body: |
5756
bb.0:
5857
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__const_v_fi
59-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
60-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
58+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, %stack.0, 0, implicit $exec
6159
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
6260
; CHECK-NEXT: SI_RETURN implicit $sgpr4
6361
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -77,8 +75,7 @@ stack:
7775
body: |
7876
bb.0:
7977
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_const
80-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
81-
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
78+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 128, 0, implicit $exec
8279
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
8380
; CHECK-NEXT: SI_RETURN implicit $sgpr4
8481
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -98,8 +95,7 @@ stack:
9895
body: |
9996
bb.0:
10097
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64___fi_const_v
101-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
102-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
98+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, %stack.0, 0, implicit $exec
10399
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
104100
; CHECK-NEXT: SI_RETURN implicit $vgpr0
105101
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 16 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -210,21 +210,10 @@ stack:
210210
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
211211
body: |
212212
bb.0:
213-
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
214-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
215-
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
216-
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
217-
; GFX9-NEXT: SI_RETURN implicit $sgpr4
218-
;
219-
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
220-
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
221-
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
222-
; GFX10-NEXT: SI_RETURN implicit $sgpr4
223-
;
224-
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
225-
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
226-
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
227-
; GFX12-NEXT: SI_RETURN implicit $sgpr4
213+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
214+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
215+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
216+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
228217
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
229218
%1:vgpr_32 = V_ADD_U32_e64 64, %0, 0, implicit $exec
230219
$sgpr4 = COPY %1
@@ -241,21 +230,10 @@ stack:
241230
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
242231
body: |
243232
bb.0:
244-
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
245-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
246-
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
247-
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
248-
; GFX9-NEXT: SI_RETURN implicit $sgpr4
249-
;
250-
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
251-
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
252-
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
253-
; GFX10-NEXT: SI_RETURN implicit $sgpr4
254-
;
255-
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
256-
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
257-
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
258-
; GFX12-NEXT: SI_RETURN implicit $sgpr4
233+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
234+
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
235+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
236+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
259237
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
260238
%1:vgpr_32 = V_ADD_U32_e64 %0, 64, 0, implicit $exec
261239
$sgpr4 = COPY %1
@@ -292,21 +270,10 @@ stack:
292270
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
293271
body: |
294272
bb.0:
295-
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
296-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
297-
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
298-
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
299-
; GFX9-NEXT: SI_RETURN implicit $vgpr0
300-
;
301-
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
302-
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
303-
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
304-
; GFX10-NEXT: SI_RETURN implicit $vgpr0
305-
;
306-
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
307-
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
308-
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
309-
; GFX12-NEXT: SI_RETURN implicit $vgpr0
273+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
274+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
275+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
276+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
310277
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
311278
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec
312279
$vgpr0 = COPY %1
@@ -323,21 +290,10 @@ stack:
323290
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
324291
body: |
325292
bb.0:
326-
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
327-
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
328-
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
329-
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
330-
; GFX9-NEXT: SI_RETURN implicit $vgpr0
331-
;
332-
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
333-
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
334-
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
335-
; GFX10-NEXT: SI_RETURN implicit $vgpr0
336-
;
337-
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
338-
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
339-
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
340-
; GFX12-NEXT: SI_RETURN implicit $vgpr0
293+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
294+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
295+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
296+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
341297
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
342298
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 64, %0, 0, implicit $exec
343299
$vgpr0 = COPY %1

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ define void @func_mov_fi_i32_offset() #0 {
6464
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
6565
; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
6666

67-
; GFX9-FLATSCR: v_mov_b32_e32 [[ADD:v[0-9]+]], s32
68-
; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 4, [[ADD]]
67+
; FIXME: Should commute and shrink
68+
; GFX9-FLATSCR: v_add_u32_e64 v0, 4, s32
6969

7070
; GCN-NOT: v_mov
7171
; GCN: ds_write_b32 v0, v0
@@ -164,12 +164,12 @@ define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8
164164
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4 glc{{$}}
165165

166166
; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6
167-
; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]]
167+
; CI: v_add_i32_e64 [[GEP:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]
168168

169-
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
170-
; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32
169+
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
170+
; GFX9-MUBUF: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
171171

172-
; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
172+
; GFX9-FLATSCR: v_add_u32_e64 [[GEP:v[0-9]+]], 4, s32
173173

174174
; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
175175
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block(ptr addrspace(5) byval({ i8, i32 }) %arg0, i32 %arg2) #0 {

llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,17 +1426,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
14261426
; GFX10_1-NEXT: buffer_store_dword v2, off, s[0:3], s5 ; 4-byte Folded Spill
14271427
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
14281428
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
1429-
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
1429+
; GFX10_1-NEXT: v_lshrrev_b32_e64 v3, 5, s32
14301430
; GFX10_1-NEXT: v_writelane_b32 v2, s59, 0
1431-
; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32
1431+
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
14321432
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
1433-
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
1434-
; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1
1433+
; GFX10_1-NEXT: v_add_nc_u32_e32 v3, 0x4040, v3
1434+
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
14351435
; GFX10_1-NEXT: ;;#ASMSTART
1436-
; GFX10_1-NEXT: ; use alloca0 v1
1436+
; GFX10_1-NEXT: ; use alloca0 v0
14371437
; GFX10_1-NEXT: ;;#ASMEND
1438-
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0
1439-
; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
1438+
; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 0x3ec, v3
1439+
; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1
14401440
; GFX10_1-NEXT: ;;#ASMSTART
14411441
; GFX10_1-NEXT: ; use s59, scc
14421442
; GFX10_1-NEXT: ;;#ASMEND
@@ -1456,17 +1456,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
14561456
; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
14571457
; GFX10_3-NEXT: buffer_store_dword v2, off, s[0:3], s5 ; 4-byte Folded Spill
14581458
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
1459-
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
1459+
; GFX10_3-NEXT: v_lshrrev_b32_e64 v3, 5, s32
14601460
; GFX10_3-NEXT: v_writelane_b32 v2, s59, 0
1461-
; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32
1461+
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
14621462
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
1463-
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
1464-
; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1
1463+
; GFX10_3-NEXT: v_add_nc_u32_e32 v3, 0x4040, v3
1464+
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
14651465
; GFX10_3-NEXT: ;;#ASMSTART
1466-
; GFX10_3-NEXT: ; use alloca0 v1
1466+
; GFX10_3-NEXT: ; use alloca0 v0
14671467
; GFX10_3-NEXT: ;;#ASMEND
1468-
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0
1469-
; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
1468+
; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 0x3ec, v3
1469+
; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1
14701470
; GFX10_3-NEXT: ;;#ASMSTART
14711471
; GFX10_3-NEXT: ; use s59, scc
14721472
; GFX10_3-NEXT: ;;#ASMEND
@@ -1485,19 +1485,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
14851485
; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
14861486
; GFX11-NEXT: scratch_store_b32 off, v2, s1 ; 4-byte Folded Spill
14871487
; GFX11-NEXT: s_mov_b32 exec_lo, s0
1488-
; GFX11-NEXT: s_add_i32 s0, s32, 0x4040
1488+
; GFX11-NEXT: s_add_i32 s0, s32, 64
14891489
; GFX11-NEXT: v_writelane_b32 v2, s59, 0
14901490
; GFX11-NEXT: v_mov_b32_e32 v0, s0
1491-
; GFX11-NEXT: s_add_i32 s0, s32, 64
1492-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1493-
; GFX11-NEXT: v_mov_b32_e32 v1, s0
1494-
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
1495-
; GFX11-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0
1491+
; GFX11-NEXT: s_add_i32 s0, s32, 0x4040
14961492
; GFX11-NEXT: ;;#ASMSTART
1497-
; GFX11-NEXT: ; use alloca0 v1
1493+
; GFX11-NEXT: ; use alloca0 v0
14981494
; GFX11-NEXT: ;;#ASMEND
1495+
; GFX11-NEXT: v_add_nc_u32_e64 v1, 0x3ec, s0
1496+
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
14991497
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1500-
; GFX11-NEXT: v_readfirstlane_b32 s59, v0
1498+
; GFX11-NEXT: v_readfirstlane_b32 s59, v1
15011499
; GFX11-NEXT: ;;#ASMSTART
15021500
; GFX11-NEXT: ; use s59, scc
15031501
; GFX11-NEXT: ;;#ASMEND
@@ -1523,14 +1521,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
15231521
; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000
15241522
; GFX12-NEXT: v_writelane_b32 v2, s59, 0
15251523
; GFX12-NEXT: s_wait_alu 0xfffe
1526-
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s32
1524+
; GFX12-NEXT: v_add_nc_u32_e64 v1, 0x3ec, s0
1525+
; GFX12-NEXT: v_mov_b32_e32 v0, s32
15271526
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
15281527
; GFX12-NEXT: ;;#ASMSTART
1529-
; GFX12-NEXT: ; use alloca0 v1
1528+
; GFX12-NEXT: ; use alloca0 v0
15301529
; GFX12-NEXT: ;;#ASMEND
1531-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1532-
; GFX12-NEXT: v_add_nc_u32_e32 v0, 0x3ec, v0
1533-
; GFX12-NEXT: v_readfirstlane_b32 s59, v0
1530+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
1531+
; GFX12-NEXT: v_readfirstlane_b32 s59, v1
15341532
; GFX12-NEXT: ;;#ASMSTART
15351533
; GFX12-NEXT: ; use s59, scc
15361534
; GFX12-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,17 +1983,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
19831983
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
19841984
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
19851985
; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0
1986-
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
19871986
; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32
1987+
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
19881988
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
19891989
; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1
1990-
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
1991-
; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1
1990+
; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 0x4040, v1
1991+
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
19921992
; GFX10_1-NEXT: ;;#ASMSTART
1993-
; GFX10_1-NEXT: ; use alloca0 v1
1993+
; GFX10_1-NEXT: ; use alloca0 v0
19941994
; GFX10_1-NEXT: ;;#ASMEND
19951995
; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2
1996-
; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 0x200, v0
1996+
; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 0x200, v1
19971997
; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3
19981998
; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4
19991999
; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5
@@ -2070,17 +2070,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
20702070
; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
20712071
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
20722072
; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0
2073-
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
20742073
; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32
2074+
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
20752075
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
20762076
; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1
2077-
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
2078-
; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1
2077+
; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 0x4040, v1
2078+
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
20792079
; GFX10_3-NEXT: ;;#ASMSTART
2080-
; GFX10_3-NEXT: ; use alloca0 v1
2080+
; GFX10_3-NEXT: ; use alloca0 v0
20812081
; GFX10_3-NEXT: ;;#ASMEND
20822082
; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2
2083-
; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 0x200, v0
2083+
; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 0x200, v1
20842084
; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3
20852085
; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4
20862086
; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5
@@ -2156,16 +2156,15 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
21562156
; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill
21572157
; GFX11-NEXT: s_mov_b32 exec_lo, s0
21582158
; GFX11-NEXT: v_writelane_b32 v23, s30, 0
2159-
; GFX11-NEXT: s_add_i32 s0, s32, 0x4040
2159+
; GFX11-NEXT: s_add_i32 s0, s32, 64
21602160
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
21612161
; GFX11-NEXT: v_mov_b32_e32 v0, s0
2162-
; GFX11-NEXT: s_add_i32 s0, s32, 64
2162+
; GFX11-NEXT: s_add_i32 s0, s32, 0x4040
21632163
; GFX11-NEXT: v_writelane_b32 v23, s31, 1
2164-
; GFX11-NEXT: v_mov_b32_e32 v1, s0
2164+
; GFX11-NEXT: v_add_nc_u32_e64 v22, 0x200, s0
21652165
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
2166-
; GFX11-NEXT: v_add_nc_u32_e32 v22, 0x200, v0
21672166
; GFX11-NEXT: ;;#ASMSTART
2168-
; GFX11-NEXT: ; use alloca0 v1
2167+
; GFX11-NEXT: ; use alloca0 v0
21692168
; GFX11-NEXT: ;;#ASMEND
21702169
; GFX11-NEXT: v_writelane_b32 v23, s33, 2
21712170
; GFX11-NEXT: v_writelane_b32 v23, s34, 3
@@ -2249,15 +2248,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
22492248
; GFX12-NEXT: s_mov_b32 exec_lo, s0
22502249
; GFX12-NEXT: v_writelane_b32 v23, s30, 0
22512250
; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000
2251+
; GFX12-NEXT: v_mov_b32_e32 v0, s32
22522252
; GFX12-NEXT: s_wait_alu 0xfffe
2253-
; GFX12-NEXT: v_dual_mov_b32 v1, s32 :: v_dual_mov_b32 v0, s0
2253+
; GFX12-NEXT: v_add_nc_u32_e64 v22, 0x200, s0
22542254
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
22552255
; GFX12-NEXT: v_writelane_b32 v23, s31, 1
22562256
; GFX12-NEXT: ;;#ASMSTART
2257-
; GFX12-NEXT: ; use alloca0 v1
2257+
; GFX12-NEXT: ; use alloca0 v0
22582258
; GFX12-NEXT: ;;#ASMEND
2259-
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
2260-
; GFX12-NEXT: v_add_nc_u32_e32 v22, 0x200, v0
22612259
; GFX12-NEXT: v_writelane_b32 v23, s33, 2
22622260
; GFX12-NEXT: v_writelane_b32 v23, s34, 3
22632261
; GFX12-NEXT: v_writelane_b32 v23, s35, 4

0 commit comments

Comments
 (0)