Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 38 additions & 29 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() {
}

bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
// Assume we can't combine the last pop with the sp restore.
bool CombineAfterCSRBump = false;

unsigned ProloguePopSize = PrologueSaveSize;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
// With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
// that needs to be popped until we reach the start of the SVE save area.
// The "FixedObject" stack occurs after the SVE area and must be popped
// later.
ProloguePopSize -= FixedObject;
AfterCSRPopSize += FixedObject;
} else if (!CombineSPBump && PrologueSaveSize != 0) {
}

// Assume we can't combine the last pop with the sp restore.
if (!CombineSPBump && ProloguePopSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
AArch64InstrInfo::isSEHInstruction(*Pop))
AArch64InstrInfo::isSEHInstruction(*Pop) ||
(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord &&
isPartOfSVECalleeSaves(Pop)))
Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
Expand All @@ -1377,18 +1387,24 @@ void AArch64EpilogueEmitter::emitEpilogue() {
// may clobber), convert it to a post-index ldp.
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
convertCalleeSaveRestoreToSPPrePostIncDec(
Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
PrologueSaveSize);
Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
ProloguePopSize);
} else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
// If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
// callee-save non-SVE registers to move the stack pointer to the start of
// the SVE area.
emitFrameOffset(MBB, std::next(Pop), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(ProloguePopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI);
} else {
// If not, make sure to emit an add after the last ldp.
// Otherwise, make sure to emit an add after the last ldp.
// We're doing this by transferring the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
// pops.
AfterCSRPopSize += PrologueSaveSize;
CombineAfterCSRBump = true;
AfterCSRPopSize += ProloguePopSize;
}
}

// Move past the restores of the callee-saved registers.
// If we plan on combining the sp bump of the local stack size and the callee
// save stack size, we might need to adjust the CSR save and restore offsets.
Expand Down Expand Up @@ -1419,6 +1435,17 @@ void AArch64EpilogueEmitter::emitEpilogue() {
--SEHEpilogueStartI;
}

// Determine the ranges of SVE callee-saves. This is done before emitting any
// code at the end of the epilogue (for Swift async), which can get in the way
// of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
auto [PPR, ZPR] = getSVEStackFrameSizes();
auto [PPRRange, ZPRRange] = partitionSVECS(
MBB,
SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
? MBB.getFirstTerminator()
: FirstGPRRestoreI,
PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);

if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);

Expand All @@ -1441,14 +1468,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");

auto [PPR, ZPR] = getSVEStackFrameSizes();
auto [PPRRange, ZPRRange] = partitionSVECS(
MBB,
SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
? MBB.getFirstTerminator()
: FirstGPRRestoreI,
PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);

StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset SVEStackSize =
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
Expand All @@ -1467,16 +1486,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
NeedsWinCFI, &HasWinCFI);
}

// Deallocate callee-save non-SVE registers.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);

// Deallocate fixed objects.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(FixedObject), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);

// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
Expand Down Expand Up @@ -1619,7 +1628,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
}
}

Expand Down
30 changes: 10 additions & 20 deletions llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
; CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.4)
; CHECK-NEXT: frame-destroy SEH_SavePReg 4, 0
; CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 1 :: (load (s16) from %stack.3)
Expand Down Expand Up @@ -430,10 +428,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 0
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
Expand Down Expand Up @@ -557,10 +553,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
; CHECK-NEXT: $x21, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
; CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 30, 16
; CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
; CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 0
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
; CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
; CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -32
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.21)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.20)
Expand Down Expand Up @@ -745,10 +739,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
; CHECK-NEXT: frame-destroy SEH_SetFP
; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
; CHECK-NEXT: frame-destroy SEH_SaveFPLR 0
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
; CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
Expand Down Expand Up @@ -869,10 +861,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 7, implicit $vg
; CHECK-NEXT: frame-destroy SEH_AllocZ 7
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.6)
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.6)
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.8)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 1
; CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.7)
Expand Down
Loading