Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2373,13 +2373,6 @@ void AArch64FrameLowering::determineStackHazardSlot(
return;
}

const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) {
LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable "
"sized objects or realignment\n");
return;
}

// If another calling convention is explicitly set FPRs can't be promoted to
// ZPR callee-saves.
if (!is_contained({CallingConv::C, CallingConv::Fast,
Expand All @@ -2395,6 +2388,7 @@ void AArch64FrameLowering::determineStackHazardSlot(
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Expected SVE to be available for PPRs");

const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// With SplitSVEObjects the CS hazard padding is placed between the
// PPRs and ZPRs. If there are any FPR CS there would be a hazard between
// them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs.
Expand Down
70 changes: 43 additions & 27 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -793,10 +793,9 @@ void AArch64PrologueEmitter::emitPrologue() {
CFAOffset += AllocateBeforePPRs;
assert(PPRRange.End == ZPRRange.Begin &&
"Expected ZPR callee saves after PPR locals");
allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
NonSVELocalsSize);
allocateStackSpace(
PPRRange.End, 0, AllocateAfterPPRs, EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || ZPR.LocalsSize || NonSVELocalsSize);
CFAOffset += AllocateAfterPPRs;
} else {
assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
Expand Down Expand Up @@ -1308,6 +1307,26 @@ AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
SEHEpilogueStartI = MBB.end();
}

void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
StackOffset Offset) {
// Other combinations could be supported, but are not currently needed.
assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
"expected negative offset (with optional fixed portion)");
Register Base = AArch64::FP;
if (int64_t FixedOffset = Offset.getFixed()) {
// If we have a negative fixed offset, we need to first subtract it in a
// temporary register first (to avoid briefly deallocating the scalable
// portion of the offset).
Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
StackOffset::getFixed(FixedOffset), TII,
MachineInstr::FrameDestroy);
}
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
StackOffset::getScalable(Offset.getScalable()), TII,
MachineInstr::FrameDestroy);
}

void AArch64EpilogueEmitter::emitEpilogue() {
MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
if (MBB.end() != EpilogueEndI) {
Expand Down Expand Up @@ -1408,6 +1427,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
AfterCSRPopSize += ProloguePopSize;
}
}

// Move past the restores of the callee-saved registers.
// If we plan on combining the sp bump of the local stack size and the callee
// save stack size, we might need to adjust the CSR save and restore offsets.
Expand Down Expand Up @@ -1474,8 +1494,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset SVEStackSize =
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
MachineBasicBlock::iterator RestoreEnd = PPRRange.End;

// Deallocate the SVE area.
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
Expand All @@ -1490,7 +1508,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
}

// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
} else if (AFI->hasSVEStackSize()) {
Expand All @@ -1501,28 +1519,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
: AArch64::SP;
if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
// TODO: Support stack realigment and variable-sized objects.
assert(
SVELayout != SVEStackLayout::Split &&
"unexpected stack realignment or variable sized objects with split "
"SVE stack objects");

Register CalleeSaveBase = AArch64::FP;
if (int64_t CalleeSaveBaseOffset =
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
// If we have have an non-zero offset to the non-SVE CS base we need to
// compute the base address by subtracting the offest in a temporary
// register first (to avoid briefly deallocating the SVE CS).
CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
&AArch64::GPR64RegClass);
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
// The offset from the frame-pointer to the start of the ZPR saves.
StackOffset FPOffsetZPR =
-SVECalleeSavesSize -
StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());

// With split SVE, the PPR locals are above the ZPR callee-saves.
if (ZPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split)
FPOffsetZPR -= PPR.LocalsSize;

// Deallocate the stack space space by moving the SP to the start of the
// ZPR/PPR callee-save area.
moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);

if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
// Move to the start of the PPR area (this offset may be zero).
StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::SP,
FPOffsetPPR - FPOffsetZPR, TII,
MachineInstr::FrameDestroy);
}
// The code below will deallocate the stack space space by moving the SP
// to the start of the SVE callee-save area.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
-SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
} else if (BaseForSVEDealloc == AArch64::SP) {
auto CFAOffset =
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ class AArch64EpilogueEmitter final : public AArch64PrologueEpilogueCommon {
private:
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;

/// A helper for moving the SP to a negative offset from the FP, without
/// deallocating any stack in the range FP to FP + Offset.
void moveSPBelowFP(MachineBasicBlock::iterator MBBI, StackOffset Offset);

void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) const;

Expand Down
49 changes: 21 additions & 28 deletions llvm/test/CodeGen/AArch64/framelayout-split-sve.mir
Original file line number Diff line number Diff line change
Expand Up @@ -182,63 +182,56 @@ body: |
RET_ReallyLR

# CHECK-LABEL: name: test_allocate_split_sve_realigned
# CHECK: stackSize: 2080
# CHECK: stackSize: 1056

# CHECK: bb.0.entry:
# CHECK: liveins: $z0, $p0, $lr
# CHECK: $sp = frame-setup SUBXri $sp, 1040, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040
# CHECK-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.5)
# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 129 :: (store (s64) into %stack.4)
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 1024, 0
# CHECK: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.5), (store (s64) into %stack.4)
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8
# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 1040, 0
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg
# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 1040, 0
# CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2, implicit $vg
# CHECK-NEXT: $sp = frame-setup ANDXri killed $x9, 7930
# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]], 7930
#
# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0
# CHECK-NEXT: $x8 = ADDPL_XXI $x8, -1, implicit $vg
# CHECK-NEXT: STR_ZXI $z0, killed $x8, -1 :: (store (<vscale x 1 x s128>) into %stack.0)
# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0
# CHECK-NEXT: STR_PXI $p0, killed $x8, -15 :: (store (<vscale x 1 x s16>) into %stack.1)
# CHECK-NEXT: STR_ZXI $z0, killed $x8, -2 :: (store (<vscale x 1 x s128>) into %stack.0)
# CHECK-NEXT: STR_PXI $p0, $fp, -6 :: (store (<vscale x 1 x s16>) into %stack.1)
#
# CHECK-NEXT: $sp = frame-destroy SUBXri $fp, 1024, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1040
# CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 129 :: (load (s64) from %stack.4)
# CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.5)
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0
# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16
# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4)
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29
# CHECK-NEXT: RET_ReallyLR

# ASM-LABEL: test_allocate_split_sve_realigned
# ASM: sub sp, sp, #1040
# ASM-NEXT: .cfi_def_cfa_offset 1040
# ASM-NEXT: str x29, [sp, #1024]
# ASM-NEXT: str x30, [sp, #1032]
# ASM-NEXT: add x29, sp, #1024
# ASM: stp x29, x30, [sp, #-16]!
# ASM-NEXT: .cfi_def_cfa_offset 16
# ASM-NEXT: mov x29, sp
# ASM-NEXT: .cfi_def_cfa w29, 16
# ASM-NEXT: .cfi_offset w30, -8
# ASM-NEXT: .cfi_offset w29, -16
#
# ASM: sub sp, x29, #1024
# ASM-NEXT: .cfi_def_cfa wsp, 1040
# ASM-NEXT: ldr x30, [sp, #1032]
# ASM-NEXT: ldr x29, [sp, #1024]
# ASM-NEXT: add sp, sp, #1040
# ASM: mov sp, x29
# ASM-NEXT: .cfi_def_cfa wsp, 16
# ASM-NEXT: ldp x29, x30, [sp], #16
# ASM-NEXT: .cfi_def_cfa_offset 0
# ASM-NEXT: .cfi_restore w30
# ASM-NEXT: .cfi_restore w29

# UNWINDINFO: DW_CFA_def_cfa_offset: +1040
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +1040
# UNWINDINFO: DW_CFA_def_cfa: reg31 +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
# UNWINDINFO-NEXT: DW_CFA_restore: reg30
# UNWINDINFO-NEXT: DW_CFA_restore: reg29
Expand Down
Loading