Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 73 additions & 77 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {
{ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
}

SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations(
SVEFrameSizes const &SVE) {
StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
StackOffset AfterPPRs = {};
if (SVELayout == SVEStackLayout::Split) {
BeforePPRs = SVE.PPR.CalleeSavesSize;
// If there are no ZPR CSRs, place all local allocations after the ZPRs.
if (SVE.ZPR.CalleeSavesSize)
AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
else
AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
}
return {BeforePPRs, AfterPPRs, AfterZPRs};
}

struct SVEPartitions {
struct {
MachineBasicBlock::iterator Begin, End;
Expand Down Expand Up @@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {
// All of the remaining stack allocations are for locals.
determineLocalsStackSize(NumBytes, PrologueSaveSize);

auto [PPR, ZPR] = getSVEStackFrameSizes();
SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});

MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
assert(!SVEAllocs.AfterPPRs &&
"unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If we're doing SVE saves first, we need to immediately allocate space
// for fixed objects, then space for the SVE callee saves.
//
// Windows unwind requires that the scalable size is a multiple of 16;
// that's handled when the callee-saved size is computed.
auto SaveSize =
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
StackOffset::getFixed(FixedObject);
auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
/*FollowupAllocs=*/true);
NumBytes -= FixedObject;
Expand Down Expand Up @@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);

auto [PPR, ZPR] = getSVEStackFrameSizes();
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
SVEAllocs.AfterZPRs += NonSVELocalsSize;

StackOffset CFAOffset =
StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;

MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
// Allocate space for the callee saves and PPR locals (if any).
if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) {
Expand All @@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(AfterSVESavesI);

StackOffset AllocateBeforePPRs = SVECalleeSavesSize;
StackOffset AllocateAfterPPRs = PPR.LocalsSize;
if (SVELayout == SVEStackLayout::Split) {
AllocateBeforePPRs = PPR.CalleeSavesSize;
AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize;
}
allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs,
allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || AllocateAfterPPRs ||
ZPR.LocalsSize || NonSVELocalsSize);
CFAOffset += AllocateBeforePPRs;
MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
SVEAllocs.AfterZPRs);
CFAOffset += SVEAllocs.BeforePPRs;
assert(PPRRange.End == ZPRRange.Begin &&
"Expected ZPR callee saves after PPR locals");
allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs,
allocateStackSpace(PPRRange.End, RealignmentPadding, SVEAllocs.AfterPPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || ZPR.LocalsSize ||
NonSVELocalsSize);
CFAOffset += AllocateAfterPPRs;
MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
CFAOffset += SVEAllocs.AfterPPRs;
} else {
assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
// Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been
// allocated (and separate PPR locals are not supported, all SVE locals,
// both PPR and ZPR, are within the ZPR locals area).
assert(!PPR.LocalsSize && "Unexpected PPR locals!");
CFAOffset += SVECalleeSavesSize;
// Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
// already been allocated. PPR locals (included in AfterPPRs) are not
// supported (note: this is asserted above).
CFAOffset += SVEAllocs.BeforePPRs;
}

// Allocate space for the rest of the frame including ZPR locals. Align the
Expand All @@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
// correct value here, as NumBytes also includes padding bytes, which
// shouldn't be counted here.
allocateStackSpace(
AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize,
EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects());
allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects());
}

// If we need a base pointer, set it up here. It's whatever the value of the
Expand Down Expand Up @@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {
assert(NumBytes >= 0 && "Negative stack allocation size!?");

StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
StackOffset SVEStackSize =
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin;
MachineBasicBlock::iterator RestoreEnd = PPRRange.End;

// Deallocate the SVE area.
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize;
assert(!SVEAllocs.AfterPPRs &&
"unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
// If the callee-save area is before FP, restoring the FP implicitly
// deallocates non-callee-save SVE allocations. Otherwise, deallocate them
// deallocates non-callee-save SVE allocations. Otherwise, deallocate them
// explicitly.
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
SVELocalsSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI);
}

// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI);
} else if (AFI->hasSVEStackSize()) {
// If we have stack realignment or variable-sized objects we must use the FP
// to restore SVE callee saves (as there is an unknown amount of
Expand Down Expand Up @@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() {
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
-SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
} else if (BaseForSVEDealloc == AArch64::SP) {
auto CFAOffset =
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);

if (SVECalleeSavesSize) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
auto NonSVELocals = StackOffset::getFixed(NumBytes);
emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
NonSVELocals, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
CFAOffset -= NonSVELocals;
NumBytes = 0;
}

if (ZPR.LocalsSize) {
emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
CFAOffset -= ZPR.LocalsSize;
auto NonSVELocals = StackOffset::getFixed(NumBytes);
auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
SVEAllocs.totalSize();

if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
// Deallocate non-SVE locals now. This is needed to reach the SVE callee
// saves, but may also allow combining stack hazard bumps for split SVE.
SVEAllocs.AfterZPRs += NonSVELocals;
NumBytes -= NonSVELocals.getFixed();
}

StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize;
if (SVELayout == SVEStackLayout::Split &&
(PPR.LocalsSize || ZPR.CalleeSavesSize)) {
assert(PPRRange.Begin == ZPRRange.End &&
"Expected PPR restores after ZPR");
emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
PPR.LocalsSize + ZPR.CalleeSavesSize, TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI, EmitCFI && !HasFP, CFAOffset);
CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize;
SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize;
}

// If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs:
if (SVECalleeSavesToDealloc)
emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
SVECalleeSavesToDealloc, TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI,
&HasWinCFI, EmitCFI && !HasFP, CFAOffset);
// To deallocate the SVE stack adjust by the allocations in reverse.
emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
CFAOffset);
CFAOffset -= SVEAllocs.AfterZPRs;
assert(PPRRange.Begin == ZPRRange.End &&
"Expected PPR restores after ZPR");
emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
CFAOffset);
CFAOffset -= SVEAllocs.AfterPPRs;
emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
CFAOffset);
}

if (EmitCFI)
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ struct SVEFrameSizes {
} PPR, ZPR;
};

struct SVEStackAllocations {
StackOffset BeforePPRs, AfterPPRs, AfterZPRs;
StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; }
};

class AArch64PrologueEpilogueCommon {
public:
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB,
Expand Down Expand Up @@ -66,6 +71,7 @@ class AArch64PrologueEpilogueCommon {
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;

SVEFrameSizes getSVEStackFrameSizes() const;
SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &);

MachineFunction &MF;
MachineBasicBlock &MBB;
Expand Down
Loading