Skip to content

Commit cf89dea

Browse files
committed
Tidy up and add tests with frame-ptr attr
1 parent eda3870 commit cf89dea

File tree

3 files changed

+37
-9
lines changed

3 files changed

+37
-9
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -714,8 +714,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
714714
assert(hasFP(MF));
715715
Register FPReg = MFI->getFrameOffsetReg();
716716
assert(FPReg != AMDGPU::FP_REG);
717-
Register SPReg = MFI->getStackPtrOffsetReg();
718-
assert(SPReg != AMDGPU::SP_REG);
719717
unsigned VGPRSize =
720718
llvm::alignTo((ST.getAddressableNumVGPRs() -
721719
AMDGPU::IsaInfo::getVGPRAllocGranule(&ST)) *
@@ -732,6 +730,9 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
732730
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32)).addImm(0).addReg(FPReg);
733731
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), FPReg).addImm(VGPRSize);
734732
if (requiresStackPointerReference(MF)) {
733+
Register SPReg = MFI->getStackPtrOffsetReg();
734+
assert(SPReg != AMDGPU::SP_REG);
735+
735736
// If at least one of the constants can be inlined, then we can use
736737
// s_cselect. Otherwise, use a mov and cmovk.
737738
if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -512,13 +512,6 @@ Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
512512
const SIFrameLowering *TFI = ST.getFrameLowering();
513513
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
514514

515-
// If we need to reserve scratch space for saving the VGPRs, then we should
516-
// use the frame register for accessing our own frame (which may start at a
517-
// non-zero offset).
518-
if (TFI->mayReserveScratchForCWSR(MF))
519-
return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
520-
: FuncInfo->getStackPtrOffsetReg();
521-
522515
// During ISel lowering we always reserve the stack pointer in entry and chain
523516
// functions, but never actually want to reference it when accessing our own
524517
// frame. If we need a frame pointer we use it, but otherwise we can just use

llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,38 @@ define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 {
134134
ret void
135135
}
136136

137+
define amdgpu_cs void @frame_pointer_none() #1 {
138+
; CHECK-LABEL: frame_pointer_none:
139+
; CHECK: ; %bb.0:
140+
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
141+
; CHECK-NEXT: v_mov_b32_e32 v0, 13
142+
; CHECK-NEXT: s_cmp_lg_u32 0, s33
143+
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
144+
; CHECK-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
145+
; CHECK-NEXT: s_wait_storecnt 0x0
146+
; CHECK-NEXT: s_alloc_vgpr 0
147+
; CHECK-NEXT: s_endpgm
148+
%local = alloca i32, addrspace(5)
149+
store volatile i8 13, ptr addrspace(5) %local
150+
ret void
151+
}
152+
153+
define amdgpu_cs void @frame_pointer_all() #2 {
154+
; CHECK-LABEL: frame_pointer_all:
155+
; CHECK: ; %bb.0:
156+
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
157+
; CHECK-NEXT: v_mov_b32_e32 v0, 13
158+
; CHECK-NEXT: s_cmp_lg_u32 0, s33
159+
; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
160+
; CHECK-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
161+
; CHECK-NEXT: s_wait_storecnt 0x0
162+
; CHECK-NEXT: s_alloc_vgpr 0
163+
; CHECK-NEXT: s_endpgm
164+
%local = alloca i32, addrspace(5)
165+
store volatile i8 13, ptr addrspace(5) %local
166+
ret void
167+
}
168+
137169
; Non-entry functions and graphics shaders don't need to worry about CWSR.
138170
define amdgpu_gs void @amdgpu_gs() #0 {
139171
; CHECK-LABEL: amdgpu_gs:
@@ -215,3 +247,5 @@ define void @default() #0 {
215247
declare amdgpu_gfx void @callee(i32) #0
216248

217249
attributes #0 = { nounwind }
250+
attributes #1 = { nounwind "frame-pointer"="none" }
251+
attributes #2 = { nounwind "frame-pointer"="all" }

0 commit comments

Comments
 (0)