Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2373,13 +2373,6 @@ void AArch64FrameLowering::determineStackHazardSlot(
return;
}

const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) {
LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable "
"sized objects or realignment\n");
return;
}

// If another calling convention is explicitly set FPRs can't be promoted to
// ZPR callee-saves.
if (!is_contained({CallingConv::C, CallingConv::Fast,
Expand All @@ -2395,6 +2388,7 @@ void AArch64FrameLowering::determineStackHazardSlot(
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Expected SVE to be available for PPRs");

const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// With SplitSVEObjects the CS hazard padding is placed between the
// PPRs and ZPRs. If there are any FPR CS there would be a hazard between
// them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs.
Expand Down
70 changes: 43 additions & 27 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -793,10 +793,9 @@ void AArch64PrologueEmitter::emitPrologue() {
CFAOffset += AllocateBeforePPRs;
assert(PPRRange.End == ZPRRange.Begin &&
"Expected ZPR callee saves after PPR locals");
allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
NonSVELocalsSize);
allocateStackSpace(
PPRRange.End, 0, AllocateAfterPPRs, EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || ZPR.LocalsSize || NonSVELocalsSize);
CFAOffset += AllocateAfterPPRs;
} else {
assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
Expand Down Expand Up @@ -1308,6 +1307,26 @@ AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
SEHEpilogueStartI = MBB.end();
}

void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
StackOffset Offset) {
// Other combinations could be supported, but are not currently needed.
assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
"expected negative offset (with optional fixed portion)");
Register Base = AArch64::FP;
if (int64_t FixedOffset = Offset.getFixed()) {
// If we have a negative fixed offset, we need to first subtract it in a
// temporary register first (to avoid briefly deallocating the scalable
// portion of the offset).
Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
StackOffset::getFixed(FixedOffset), TII,
MachineInstr::FrameDestroy);
}
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
StackOffset::getScalable(Offset.getScalable()), TII,
MachineInstr::FrameDestroy);
}

void AArch64EpilogueEmitter::emitEpilogue() {
MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
if (MBB.end() != EpilogueEndI) {
Expand Down Expand Up @@ -1408,6 +1427,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
AfterCSRPopSize += ProloguePopSize;
}
}

// Move past the restores of the callee-saved registers.
// If we plan on combining the sp bump of the local stack size and the callee
// save stack size, we might need to adjust the CSR save and restore offsets.
Expand Down Expand Up @@ -1474,8 +1494,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset SVEStackSize =
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
MachineBasicBlock::iterator RestoreEnd = PPRRange.End;

// Deallocate the SVE area.
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
Expand All @@ -1490,7 +1508,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
}

// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
} else if (AFI->hasSVEStackSize()) {
Expand All @@ -1501,28 +1519,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
: AArch64::SP;
if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
// TODO: Support stack realigment and variable-sized objects.
assert(
SVELayout != SVEStackLayout::Split &&
"unexpected stack realignment or variable sized objects with split "
"SVE stack objects");

Register CalleeSaveBase = AArch64::FP;
if (int64_t CalleeSaveBaseOffset =
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
// If we have have an non-zero offset to the non-SVE CS base we need to
// compute the base address by subtracting the offest in a temporary
// register first (to avoid briefly deallocating the SVE CS).
CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
&AArch64::GPR64RegClass);
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
// The offset from the frame-pointer to the start of the ZPR/PPR CSRs.
StackOffset FPOffsetZPRCSRs =
-SVECalleeSavesSize -
StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
StackOffset FPOffsetPPRCSRs = FPOffsetZPRCSRs + ZPR.CalleeSavesSize;

// With split SVE, the PPR locals are above the ZPR callee-saves.
if (ZPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split)
FPOffsetZPRCSRs -= PPR.LocalsSize;

// The code below will deallocate the stack space space by moving the SP
// to the start of the ZPR/PPR callee-save area.
moveSPBelowFP(ZPRRange.Begin, FPOffsetZPRCSRs);

if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
// Move to the start of the PPR area (this offset may be zero).
emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::SP,
FPOffsetPPRCSRs - FPOffsetZPRCSRs, TII,
MachineInstr::FrameDestroy);
}
// The code below will deallocate the stack space space by moving the SP
// to the start of the SVE callee-save area.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
-SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
} else if (BaseForSVEDealloc == AArch64::SP) {
auto CFAOffset =
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ class AArch64EpilogueEmitter final : public AArch64PrologueEpilogueCommon {
private:
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;

/// A helper for moving the SP to a negative offset from the FP, without
/// deallocating any stack in the range FP to FP + Offset.
void moveSPBelowFP(MachineBasicBlock::iterator MBBI, StackOffset Offset);

void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) const;

Expand Down
49 changes: 21 additions & 28 deletions llvm/test/CodeGen/AArch64/framelayout-split-sve.mir
Original file line number Diff line number Diff line change
Expand Up @@ -182,63 +182,56 @@ body: |
RET_ReallyLR

# CHECK-LABEL: name: test_allocate_split_sve_realigned
# CHECK: stackSize: 2080
# CHECK: stackSize: 1056

# CHECK: bb.0.entry:
# CHECK: liveins: $z0, $p0, $lr
# CHECK: $sp = frame-setup SUBXri $sp, 1040, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040
# CHECK-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.5)
# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 129 :: (store (s64) into %stack.4)
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 1024, 0
# CHECK: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 1040, 0
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 1040, 0
# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2, implicit $vg
# CHECK-NEXT: $sp = frame-setup ANDXri killed $x9, 7930
# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]], 7930
#
# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0
# CHECK-NEXT: $x8 = ADDPL_XXI $x8, -1, implicit $vg
# CHECK-NEXT: STR_ZXI $z0, killed $x8, -1 :: (store (<vscale x 1 x s128>) into %stack.0)
# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0
# CHECK-NEXT: STR_PXI $p0, killed $x8, -15 :: (store (<vscale x 1 x s16>) into %stack.1)
# CHECK-NEXT: STR_ZXI $z0, killed $x8, -2 :: (store (<vscale x 1 x s128>) into %stack.0)
# CHECK-NEXT: STR_PXI $p0, $fp, -6 :: (store (<vscale x 1 x s16>) into %stack.1)
#
# CHECK-NEXT: $sp = frame-destroy SUBXri $fp, 1024, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1040
# CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 129 :: (load (s64) from %stack.4)
# CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.5)
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4)
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
# CHECK-NEXT: RET_ReallyLR

# ASM-LABEL: test_allocate_split_sve_realigned
# ASM: sub sp, sp, #1040
# ASM-NEXT: .cfi_def_cfa_offset 1040
# ASM-NEXT: str x29, [sp, #1024]
# ASM-NEXT: str x30, [sp, #1032]
# ASM-NEXT: add x29, sp, #1024
# ASM: stp x29, x30, [sp, #-16]!
# ASM-NEXT: .cfi_def_cfa_offset 16
# ASM-NEXT: mov x29, sp
# ASM-NEXT: .cfi_def_cfa w29, 16
# ASM-NEXT: .cfi_offset w30, -8
# ASM-NEXT: .cfi_offset w29, -16
#
# ASM: sub sp, x29, #1024
# ASM-NEXT: .cfi_def_cfa wsp, 1040
# ASM-NEXT: ldr x30, [sp, #1032]
# ASM-NEXT: ldr x29, [sp, #1024]
# ASM-NEXT: add sp, sp, #1040
# ASM: mov sp, x29
# ASM-NEXT: .cfi_def_cfa wsp, 16
# ASM-NEXT: ldp x29, x30, [sp], #16
# ASM-NEXT: .cfi_def_cfa_offset 0
# ASM-NEXT: .cfi_restore w30
# ASM-NEXT: .cfi_restore w29

# UNWINDINFO: DW_CFA_def_cfa_offset: +1040
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +1040
# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
# UNWINDINFO-NEXT: DW_CFA_restore: reg30
# UNWINDINFO-NEXT: DW_CFA_restore: reg29
Expand Down
Loading
Loading