Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1813,9 +1813,22 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
Amount = -Amount;

const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
MachineInstr::NoFlags, getStackAlign());
const RISCVTargetLowering *TLI =
MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
int64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
// When stack probing is enabled, the decrement of SP may need to be
// probed. We can handle both the decrement and the probing in
// allocateStack.
bool DynAllocation =
MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

allocateStack() sets FrameSetup flag on the created instructions, which should not be set when expanding call frame pseudo instructions (since they are not in the prologue).

/*NeedProbe=*/true, ProbeSize, DynAllocation);
} else {
const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
MachineInstr::NoFlags, getStackAlign());
}
}
}

Expand Down
82 changes: 58 additions & 24 deletions llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
; If a function has variable-sized stack objects, then any function calls which
; need to pass arguments on the stack must allocate the stack space for them
; dynamically, to ensure they are at the bottom of the frame.
define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
define void @no_reserved_call_frame(i64 %n) #0 {
; RV64I-LABEL: no_reserved_call_frame:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -16
Expand All @@ -377,15 +377,20 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
; RV64I-NEXT: addi a0, a0, 15
; RV64I-NEXT: andi a0, a0, -16
; RV64I-NEXT: sub a0, sp, a0
; RV64I-NEXT: lui a2, 1
; RV64I-NEXT: lui a1, 1
; RV64I-NEXT: .LBB4_1: # %entry
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: sub sp, sp, a2
; RV64I-NEXT: sub sp, sp, a1
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: blt a0, sp, .LBB4_1
; RV64I-NEXT: # %bb.2: # %entry
; RV64I-NEXT: mv sp, a0
; RV64I-NEXT: lui a1, 1
; RV64I-NEXT: sub sp, sp, a1
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: call callee_stack_args
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: addi sp, s0, -16
; RV64I-NEXT: .cfi_def_cfa sp, 16
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
Expand All @@ -407,20 +412,27 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: addi s0, sp, 16
; RV32I-NEXT: .cfi_def_cfa s0, 0
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: slli a0, a0, 2
; RV32I-NEXT: addi a0, a0, 15
; RV32I-NEXT: andi a0, a0, -16
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: lui a2, 1
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: .LBB4_1: # %entry
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: sub sp, sp, a2
; RV32I-NEXT: sub sp, sp, a1
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: blt a0, sp, .LBB4_1
; RV32I-NEXT: # %bb.2: # %entry
; RV32I-NEXT: mv sp, a0
; RV32I-NEXT: lui a1, 1
; RV32I-NEXT: sub sp, sp, a1
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: call callee_stack_args
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 32
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: addi sp, s0, -16
; RV32I-NEXT: .cfi_def_cfa sp, 16
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
Expand All @@ -432,48 +444,70 @@ define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 {
; RV32I-NEXT: ret
entry:
%v = alloca i32, i64 %n
call void @callee_stack_args(ptr %v, i32 %dummy)
call void @callee_stack_args(ptr %v, [518 x i64] poison)
ret void
}

; Same as above but without a variable-sized allocation, so the reserved call
; frame can be folded into the fixed-size allocation in the prologue.
define void @reserved_call_frame(i64 %n, i32 %dummy) #0 {
define void @reserved_call_frame(i64 %n) #0 {
; RV64I-LABEL: reserved_call_frame:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: addi sp, sp, -416
; RV64I-NEXT: .cfi_def_cfa_offset 416
; RV64I-NEXT: sd ra, 408(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi sp, sp, -2032
; RV64I-NEXT: .cfi_def_cfa_offset 2032
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: addi a0, sp, 8
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: .cfi_def_cfa_offset 4096
; RV64I-NEXT: addi sp, sp, -48
; RV64I-NEXT: .cfi_def_cfa_offset 4144
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: call callee_stack_args
; RV64I-NEXT: ld ra, 408(sp) # 8-byte Folded Reload
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, 48
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: .cfi_def_cfa_offset 2032
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: addi sp, sp, 416
; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV32I-LABEL: reserved_call_frame:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: addi sp, sp, -416
; RV32I-NEXT: .cfi_def_cfa_offset 416
; RV32I-NEXT: sw ra, 412(sp) # 4-byte Folded Spill
; RV32I-NEXT: addi sp, sp, -2032
; RV32I-NEXT: .cfi_def_cfa_offset 2032
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: addi a0, sp, 12
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: .cfi_def_cfa_offset 4096
; RV32I-NEXT: addi sp, sp, -80
; RV32I-NEXT: .cfi_def_cfa_offset 4176
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 36
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: call callee_stack_args
; RV32I-NEXT: lw ra, 412(sp) # 4-byte Folded Reload
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, 80
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: .cfi_def_cfa_offset 2032
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: addi sp, sp, 416
; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
entry:
%v = alloca i32, i64 100
call void @callee_stack_args(ptr %v, i32 %dummy)
%v = alloca i32, i64 518
call void @callee_stack_args(ptr %v, [518 x i64] poison)
ret void
}

declare void @callee_stack_args(ptr, i32)
declare void @callee_stack_args(ptr, [518 x i64])

; Dynamic allocation of vectors
define void @dynamic_vector(i64 %size, ptr %out) #0 {
Expand Down