Skip to content

Commit 9a5c681

Browse files
committed
[AArch64][SME] Reshuffle emit[prologue|epilogue]() for splitSVEObjects (NFCI)
Requested in #142392 (comment) Change-Id: I842faddea1bd54c5e30a9985782baf5dce37e5bb
1 parent 0dfb072 commit 9a5c681

File tree

2 files changed

+145
-142
lines changed

2 files changed

+145
-142
lines changed

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
481481
StackHazardCSRSlotIndex = Index;
482482
}
483483

484+
bool hasSplitSVEObjects() const { return false; }
485+
484486
SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; }
485487

486488
unsigned getSRetReturnReg() const { return SRetReturnReg; }

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 143 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -708,85 +708,75 @@ void AArch64PrologueEmitter::emitPrologue() {
708708
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
709709
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
710710

711-
MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
712-
713711
StackOffset PPRCalleeSavesSize =
714712
StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
715713
StackOffset ZPRCalleeSavesSize =
716714
StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
717715
StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
718716
StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
719717
StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
718+
std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
719+
ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
720720

721721
StackOffset CFAOffset =
722722
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
723723
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
724-
725724
if (!FPAfterSVECalleeSaves) {
726-
MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
727-
ZPRCalleeSavesEnd = AfterGPRSavesI;
728-
MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
729-
PPRCalleeSavesEnd = AfterGPRSavesI;
730-
731-
// Process the SVE callee-saves to determine what space needs to be
732-
// allocated.
733-
725+
// Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
726+
// areas.
727+
PPRCalleeSavesBegin = AfterGPRSavesI;
734728
if (PPRCalleeSavesSize) {
735729
LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
736730
<< PPRCalleeSavesSize.getScalable() << "\n");
737731

738-
PPRCalleeSavesBegin = AfterSVESavesI;
739-
assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) &&
732+
assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
740733
"Unexpected instruction");
741734
while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
742735
AfterSVESavesI != MBB.getFirstTerminator())
743736
++AfterSVESavesI;
744-
PPRCalleeSavesEnd = AfterSVESavesI;
745737
}
746-
738+
PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
747739
if (ZPRCalleeSavesSize) {
748740
LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
749741
<< ZPRCalleeSavesSize.getScalable() << "\n");
750-
ZPRCalleeSavesBegin = AfterSVESavesI;
751-
assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) &&
742+
assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
752743
"Unexpected instruction");
753744
while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
754745
AfterSVESavesI != MBB.getFirstTerminator())
755746
++AfterSVESavesI;
756-
ZPRCalleeSavesEnd = AfterSVESavesI;
757747
}
748+
ZPRCalleeSavesEnd = AfterSVESavesI;
749+
}
750+
751+
if (EmitAsyncCFI)
752+
emitCalleeSavedSVELocations(AfterSVESavesI);
758753

754+
if (AFI->hasSplitSVEObjects()) {
755+
reportFatalInternalError("not implemented yet");
756+
} else {
759757
// Allocate space for the callee saves (if any).
760758
StackOffset LocalsSize =
761759
PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
762-
MachineBasicBlock::iterator CalleeSavesBegin =
763-
AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
764-
: ZPRCalleeSavesBegin;
765-
allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
766-
EmitAsyncCFI && !HasFP, CFAOffset,
767-
MFI.hasVarSizedObjects() || LocalsSize);
768-
769-
CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
770-
: PPRCalleeSavesEnd;
771-
}
772-
CFAOffset += SVECalleeSavesSize;
773-
774-
if (EmitAsyncCFI)
775-
emitCalleeSavedSVELocations(CalleeSavesEnd);
776-
777-
// Allocate space for the rest of the frame including SVE locals. Align the
778-
// stack as necessary.
779-
assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
780-
"Cannot use redzone with stack realignment");
781-
if (!AFL.canUseRedZone(MF)) {
782-
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
783-
// the correct value here, as NumBytes also includes padding bytes,
784-
// which shouldn't be counted here.
785-
StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
786-
allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
787-
SVELocalsSize + StackOffset::getFixed(NumBytes),
788-
EmitAsyncCFI && !HasFP, CFAOffset,
789-
MFI.hasVarSizedObjects());
760+
if (!FPAfterSVECalleeSaves)
761+
allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
762+
EmitAsyncCFI && !HasFP, CFAOffset,
763+
MFI.hasVarSizedObjects() || LocalsSize);
764+
CFAOffset += SVECalleeSavesSize;
765+
766+
// Allocate space for the rest of the frame including SVE locals. Align the
767+
// stack as necessary.
768+
assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
769+
"Cannot use redzone with stack realignment");
770+
if (!AFL.canUseRedZone(MF)) {
771+
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
772+
// the correct value here, as NumBytes also includes padding bytes,
773+
// which shouldn't be counted here.
774+
StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
775+
allocateStackSpace(AfterSVESavesI, RealignmentPadding,
776+
SVELocalsSize + StackOffset::getFixed(NumBytes),
777+
EmitAsyncCFI && !HasFP, CFAOffset,
778+
MFI.hasVarSizedObjects());
779+
}
790780
}
791781

792782
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1384,7 +1374,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13841374
if (HasFP && AFI->hasSwiftAsyncContext())
13851375
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
13861376

1387-
StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
1377+
StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
1378+
StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1379+
StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
13881380

13891381
// If there is a single SP update, insert it before the ret and we're done.
13901382
if (CombineSPBump) {
@@ -1405,111 +1397,120 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14051397
NumBytes -= PrologueSaveSize;
14061398
assert(NumBytes >= 0 && "Negative stack allocation size!?");
14071399

1408-
// Process the SVE callee-saves to determine what space needs to be
1409-
// deallocated.
1410-
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1411-
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1412-
RestoreEnd = FirstGPRRestoreI;
1413-
int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1414-
int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1415-
int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1416-
1417-
if (SVECalleeSavedSize) {
1418-
if (FPAfterSVECalleeSaves)
1419-
RestoreEnd = MBB.getFirstTerminator();
1420-
1421-
RestoreBegin = std::prev(RestoreEnd);
1422-
while (RestoreBegin != MBB.begin() &&
1423-
isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1424-
--RestoreBegin;
1425-
1426-
assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1427-
isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1428-
"Unexpected instruction");
1429-
1430-
StackOffset CalleeSavedSizeAsOffset =
1431-
StackOffset::getScalable(SVECalleeSavedSize);
1432-
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1433-
DeallocateAfter = CalleeSavedSizeAsOffset;
1434-
}
1400+
if (!AFI->hasSplitSVEObjects()) {
1401+
// Process the SVE callee-saves to determine what space needs to be
1402+
// deallocated.
1403+
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1404+
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1405+
RestoreEnd = FirstGPRRestoreI;
1406+
int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1407+
int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1408+
int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1409+
1410+
if (SVECalleeSavedSize) {
1411+
if (FPAfterSVECalleeSaves)
1412+
RestoreEnd = MBB.getFirstTerminator();
1413+
1414+
RestoreBegin = std::prev(RestoreEnd);
1415+
while (RestoreBegin != MBB.begin() &&
1416+
isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1417+
--RestoreBegin;
1418+
1419+
assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1420+
isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1421+
"Unexpected instruction");
14351422

1436-
// Deallocate the SVE area.
1437-
if (FPAfterSVECalleeSaves) {
1438-
// If the callee-save area is before FP, restoring the FP implicitly
1439-
// deallocates non-callee-save SVE allocations. Otherwise, deallocate
1440-
// them explicitly.
1441-
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1442-
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1443-
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
1444-
NeedsWinCFI, &HasWinCFI);
1423+
StackOffset CalleeSavedSizeAsOffset =
1424+
StackOffset::getScalable(SVECalleeSavedSize);
1425+
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1426+
DeallocateAfter = CalleeSavedSizeAsOffset;
14451427
}
14461428

1447-
// Deallocate callee-save non-SVE registers.
1448-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1449-
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1450-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1451-
1452-
// Deallocate fixed objects.
1453-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1454-
StackOffset::getFixed(FixedObject), TII,
1455-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1456-
1457-
// Deallocate callee-save SVE registers.
1458-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1459-
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1460-
NeedsWinCFI, &HasWinCFI);
1461-
} else if (SVEStackSize) {
1462-
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1463-
// If we have stack realignment or variable-sized objects we must use the
1464-
// FP to restore SVE callee saves (as there is an unknown amount of
1465-
// data/padding between the SP and SVE CS area).
1466-
Register BaseForSVEDealloc =
1467-
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1468-
: AArch64::SP;
1469-
if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1470-
Register CalleeSaveBase = AArch64::FP;
1471-
if (int64_t CalleeSaveBaseOffset =
1472-
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1473-
// If we have have an non-zero offset to the non-SVE CS base we need to
1474-
// compute the base address by subtracting the offest in a temporary
1475-
// register first (to avoid briefly deallocating the SVE CS).
1476-
CalleeSaveBase =
1477-
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1478-
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1479-
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1480-
MachineInstr::FrameDestroy);
1481-
}
1482-
// The code below will deallocate the stack space space by moving the
1483-
// SP to the start of the SVE callee-save area.
1484-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1485-
StackOffset::getScalable(-SVECalleeSavedSize), TII,
1486-
MachineInstr::FrameDestroy);
1487-
} else if (BaseForSVEDealloc == AArch64::SP) {
1488-
if (SVECalleeSavedSize) {
1489-
// Deallocate the non-SVE locals first before we can deallocate (and
1490-
// restore callee saves) from the SVE area.
1491-
emitFrameOffset(
1492-
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1493-
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
1494-
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1495-
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
1496-
NumBytes = 0;
1429+
// Deallocate the SVE area.
1430+
if (FPAfterSVECalleeSaves) {
1431+
// If the callee-save area is before FP, restoring the FP implicitly
1432+
// deallocates non-callee-save SVE allocations. Otherwise, deallocate
1433+
// them explicitly.
1434+
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1435+
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1436+
DeallocateBefore, TII, MachineInstr::FrameDestroy,
1437+
false, NeedsWinCFI, &HasWinCFI);
14971438
}
14981439

1440+
// Deallocate callee-save non-SVE registers.
14991441
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1500-
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
1501-
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1502-
SVEStackSize +
1503-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1442+
StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
1443+
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
1444+
&HasWinCFI);
1445+
1446+
// Deallocate fixed objects.
1447+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1448+
StackOffset::getFixed(FixedObject), TII,
1449+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1450+
&HasWinCFI);
15041451

1452+
// Deallocate callee-save SVE registers.
15051453
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
15061454
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1507-
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1508-
DeallocateAfter +
1509-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1455+
NeedsWinCFI, &HasWinCFI);
1456+
} else if (SVEStackSize) {
1457+
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1458+
// If we have stack realignment or variable-sized objects we must use the
1459+
// FP to restore SVE callee saves (as there is an unknown amount of
1460+
// data/padding between the SP and SVE CS area).
1461+
Register BaseForSVEDealloc =
1462+
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1463+
: AArch64::SP;
1464+
if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1465+
Register CalleeSaveBase = AArch64::FP;
1466+
if (int64_t CalleeSaveBaseOffset =
1467+
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1468+
// If we have have an non-zero offset to the non-SVE CS base we need
1469+
// to compute the base address by subtracting the offest in a
1470+
// temporary register first (to avoid briefly deallocating the SVE
1471+
// CS).
1472+
CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
1473+
&AArch64::GPR64RegClass);
1474+
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1475+
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1476+
MachineInstr::FrameDestroy);
1477+
}
1478+
// The code below will deallocate the stack space space by moving the
1479+
// SP to the start of the SVE callee-save area.
1480+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1481+
StackOffset::getScalable(-SVECalleeSavedSize), TII,
1482+
MachineInstr::FrameDestroy);
1483+
} else if (BaseForSVEDealloc == AArch64::SP) {
1484+
if (SVECalleeSavedSize) {
1485+
// Deallocate the non-SVE locals first before we can deallocate (and
1486+
// restore callee saves) from the SVE area.
1487+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1488+
StackOffset::getFixed(NumBytes), TII,
1489+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1490+
&HasWinCFI, EmitCFI && !HasFP,
1491+
SVEStackSize + StackOffset::getFixed(
1492+
NumBytes + PrologueSaveSize));
1493+
NumBytes = 0;
1494+
}
1495+
1496+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1497+
DeallocateBefore, TII, MachineInstr::FrameDestroy,
1498+
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1499+
SVEStackSize +
1500+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1501+
1502+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1503+
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1504+
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1505+
DeallocateAfter +
1506+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1507+
}
1508+
1509+
if (EmitCFI)
1510+
emitCalleeSavedSVERestores(RestoreEnd);
15101511
}
1511-
if (EmitCFI)
1512-
emitCalleeSavedSVERestores(RestoreEnd);
1512+
} else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
1513+
reportFatalInternalError("not implemented yet");
15131514
}
15141515

15151516
if (!HasFP) {

0 commit comments

Comments
 (0)