Skip to content

Commit 758fd7a

Browse files
authored
[AArch64][SME] Reshuffle emit[prologue|epilogue]() for splitSVEObjects (NFCI) (#161217)
Requested in #142392 (comment)
1 parent e394df3 commit 758fd7a

File tree

2 files changed

+140
-142
lines changed

2 files changed

+140
-142
lines changed

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
481481
StackHazardCSRSlotIndex = Index;
482482
}
483483

484+
bool hasSplitSVEObjects() const { return false; }
485+
484486
SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; }
485487

486488
unsigned getSRetReturnReg() const { return SRetReturnReg; }

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 138 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -715,8 +715,6 @@ void AArch64PrologueEmitter::emitPrologue() {
715715
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
716716
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
717717

718-
MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
719-
720718
StackOffset PPRCalleeSavesSize =
721719
StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
722720
StackOffset ZPRCalleeSavesSize =
@@ -728,72 +726,59 @@ void AArch64PrologueEmitter::emitPrologue() {
728726
StackOffset CFAOffset =
729727
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
730728
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
731-
732729
if (!FPAfterSVECalleeSaves) {
733-
MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
734-
ZPRCalleeSavesEnd = AfterGPRSavesI;
735-
MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
736-
PPRCalleeSavesEnd = AfterGPRSavesI;
737-
738-
// Process the SVE callee-saves to determine what space needs to be
739-
// allocated.
740-
730+
// Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
731+
// areas.
741732
if (PPRCalleeSavesSize) {
742733
LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
743734
<< PPRCalleeSavesSize.getScalable() << "\n");
744735

745-
PPRCalleeSavesBegin = AfterSVESavesI;
746-
assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) &&
736+
assert(isPartOfPPRCalleeSaves(AfterSVESavesI) &&
747737
"Unexpected instruction");
748738
while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
749739
AfterSVESavesI != MBB.getFirstTerminator())
750740
++AfterSVESavesI;
751-
PPRCalleeSavesEnd = AfterSVESavesI;
752741
}
753-
754742
if (ZPRCalleeSavesSize) {
755743
LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
756744
<< ZPRCalleeSavesSize.getScalable() << "\n");
757-
ZPRCalleeSavesBegin = AfterSVESavesI;
758-
assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) &&
745+
assert(isPartOfZPRCalleeSaves(AfterSVESavesI) &&
759746
"Unexpected instruction");
760747
while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
761748
AfterSVESavesI != MBB.getFirstTerminator())
762749
++AfterSVESavesI;
763-
ZPRCalleeSavesEnd = AfterSVESavesI;
764750
}
751+
}
752+
753+
if (EmitAsyncCFI)
754+
emitCalleeSavedSVELocations(AfterSVESavesI);
765755

756+
if (AFI->hasSplitSVEObjects()) {
757+
reportFatalInternalError("not implemented yet");
758+
} else {
766759
// Allocate space for the callee saves (if any).
767760
StackOffset LocalsSize =
768761
PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
769-
MachineBasicBlock::iterator CalleeSavesBegin =
770-
AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
771-
: ZPRCalleeSavesBegin;
772-
allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
773-
EmitAsyncCFI && !HasFP, CFAOffset,
774-
MFI.hasVarSizedObjects() || LocalsSize);
775-
776-
CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
777-
: PPRCalleeSavesEnd;
778-
}
779-
CFAOffset += SVECalleeSavesSize;
780-
781-
if (EmitAsyncCFI)
782-
emitCalleeSavedSVELocations(CalleeSavesEnd);
783-
784-
// Allocate space for the rest of the frame including SVE locals. Align the
785-
// stack as necessary.
786-
assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
787-
"Cannot use redzone with stack realignment");
788-
if (!AFL.canUseRedZone(MF)) {
789-
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
790-
// the correct value here, as NumBytes also includes padding bytes,
791-
// which shouldn't be counted here.
792-
StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
793-
allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
794-
SVELocalsSize + StackOffset::getFixed(NumBytes),
795-
EmitAsyncCFI && !HasFP, CFAOffset,
796-
MFI.hasVarSizedObjects());
762+
if (!FPAfterSVECalleeSaves)
763+
allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
764+
EmitAsyncCFI && !HasFP, CFAOffset,
765+
MFI.hasVarSizedObjects() || LocalsSize);
766+
CFAOffset += SVECalleeSavesSize;
767+
768+
// Allocate space for the rest of the frame including SVE locals. Align the
769+
// stack as necessary.
770+
assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
771+
"Cannot use redzone with stack realignment");
772+
if (!AFL.canUseRedZone(MF)) {
773+
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
774+
// the correct value here, as NumBytes also includes padding bytes,
775+
// which shouldn't be counted here.
776+
StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
777+
allocateStackSpace(AfterSVESavesI, RealignmentPadding,
778+
SVELocalsSize + StackOffset::getFixed(NumBytes),
779+
EmitAsyncCFI && !HasFP, CFAOffset,
780+
MFI.hasVarSizedObjects());
781+
}
797782
}
798783

799784
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1391,7 +1376,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13911376
if (HasFP && AFI->hasSwiftAsyncContext())
13921377
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
13931378

1394-
StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
1379+
StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
1380+
StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1381+
StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
13951382

13961383
// If there is a single SP update, insert it before the ret and we're done.
13971384
if (CombineSPBump) {
@@ -1412,111 +1399,120 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14121399
NumBytes -= PrologueSaveSize;
14131400
assert(NumBytes >= 0 && "Negative stack allocation size!?");
14141401

1415-
// Process the SVE callee-saves to determine what space needs to be
1416-
// deallocated.
1417-
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1418-
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1419-
RestoreEnd = FirstGPRRestoreI;
1420-
int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1421-
int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1422-
int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1423-
1424-
if (SVECalleeSavedSize) {
1425-
if (FPAfterSVECalleeSaves)
1426-
RestoreEnd = MBB.getFirstTerminator();
1427-
1428-
RestoreBegin = std::prev(RestoreEnd);
1429-
while (RestoreBegin != MBB.begin() &&
1430-
isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1431-
--RestoreBegin;
1432-
1433-
assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1434-
isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1435-
"Unexpected instruction");
1436-
1437-
StackOffset CalleeSavedSizeAsOffset =
1438-
StackOffset::getScalable(SVECalleeSavedSize);
1439-
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1440-
DeallocateAfter = CalleeSavedSizeAsOffset;
1441-
}
1402+
if (!AFI->hasSplitSVEObjects()) {
1403+
// Process the SVE callee-saves to determine what space needs to be
1404+
// deallocated.
1405+
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1406+
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1407+
RestoreEnd = FirstGPRRestoreI;
1408+
int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1409+
int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1410+
int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1411+
1412+
if (SVECalleeSavedSize) {
1413+
if (FPAfterSVECalleeSaves)
1414+
RestoreEnd = MBB.getFirstTerminator();
1415+
1416+
RestoreBegin = std::prev(RestoreEnd);
1417+
while (RestoreBegin != MBB.begin() &&
1418+
isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1419+
--RestoreBegin;
1420+
1421+
assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1422+
isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1423+
"Unexpected instruction");
14421424

1443-
// Deallocate the SVE area.
1444-
if (FPAfterSVECalleeSaves) {
1445-
// If the callee-save area is before FP, restoring the FP implicitly
1446-
// deallocates non-callee-save SVE allocations. Otherwise, deallocate
1447-
// them explicitly.
1448-
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1449-
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1450-
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
1451-
NeedsWinCFI, &HasWinCFI);
1425+
StackOffset CalleeSavedSizeAsOffset =
1426+
StackOffset::getScalable(SVECalleeSavedSize);
1427+
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1428+
DeallocateAfter = CalleeSavedSizeAsOffset;
14521429
}
14531430

1454-
// Deallocate callee-save non-SVE registers.
1455-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1456-
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1457-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1458-
1459-
// Deallocate fixed objects.
1460-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1461-
StackOffset::getFixed(FixedObject), TII,
1462-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1463-
1464-
// Deallocate callee-save SVE registers.
1465-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1466-
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1467-
NeedsWinCFI, &HasWinCFI);
1468-
} else if (SVEStackSize) {
1469-
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1470-
// If we have stack realignment or variable-sized objects we must use the
1471-
// FP to restore SVE callee saves (as there is an unknown amount of
1472-
// data/padding between the SP and SVE CS area).
1473-
Register BaseForSVEDealloc =
1474-
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1475-
: AArch64::SP;
1476-
if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1477-
Register CalleeSaveBase = AArch64::FP;
1478-
if (int64_t CalleeSaveBaseOffset =
1479-
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1480-
// If we have have an non-zero offset to the non-SVE CS base we need to
1481-
// compute the base address by subtracting the offest in a temporary
1482-
// register first (to avoid briefly deallocating the SVE CS).
1483-
CalleeSaveBase =
1484-
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1485-
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1486-
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1487-
MachineInstr::FrameDestroy);
1488-
}
1489-
// The code below will deallocate the stack space space by moving the
1490-
// SP to the start of the SVE callee-save area.
1491-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1492-
StackOffset::getScalable(-SVECalleeSavedSize), TII,
1493-
MachineInstr::FrameDestroy);
1494-
} else if (BaseForSVEDealloc == AArch64::SP) {
1495-
if (SVECalleeSavedSize) {
1496-
// Deallocate the non-SVE locals first before we can deallocate (and
1497-
// restore callee saves) from the SVE area.
1498-
emitFrameOffset(
1499-
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1500-
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
1501-
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1502-
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
1503-
NumBytes = 0;
1431+
// Deallocate the SVE area.
1432+
if (FPAfterSVECalleeSaves) {
1433+
// If the callee-save area is before FP, restoring the FP implicitly
1434+
// deallocates non-callee-save SVE allocations. Otherwise, deallocate
1435+
// them explicitly.
1436+
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1437+
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1438+
DeallocateBefore, TII, MachineInstr::FrameDestroy,
1439+
false, NeedsWinCFI, &HasWinCFI);
15041440
}
15051441

1442+
// Deallocate callee-save non-SVE registers.
15061443
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1507-
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
1508-
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1509-
SVEStackSize +
1510-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1444+
StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
1445+
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
1446+
&HasWinCFI);
1447+
1448+
// Deallocate fixed objects.
1449+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1450+
StackOffset::getFixed(FixedObject), TII,
1451+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1452+
&HasWinCFI);
15111453

1454+
// Deallocate callee-save SVE registers.
15121455
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
15131456
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1514-
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1515-
DeallocateAfter +
1516-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1457+
NeedsWinCFI, &HasWinCFI);
1458+
} else if (SVEStackSize) {
1459+
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1460+
// If we have stack realignment or variable-sized objects we must use the
1461+
// FP to restore SVE callee saves (as there is an unknown amount of
1462+
// data/padding between the SP and SVE CS area).
1463+
Register BaseForSVEDealloc =
1464+
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1465+
: AArch64::SP;
1466+
if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1467+
Register CalleeSaveBase = AArch64::FP;
1468+
if (int64_t CalleeSaveBaseOffset =
1469+
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1470+
// If we have have an non-zero offset to the non-SVE CS base we need
1471+
// to compute the base address by subtracting the offest in a
1472+
// temporary register first (to avoid briefly deallocating the SVE
1473+
// CS).
1474+
CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
1475+
&AArch64::GPR64RegClass);
1476+
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1477+
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1478+
MachineInstr::FrameDestroy);
1479+
}
1480+
// The code below will deallocate the stack space space by moving the
1481+
// SP to the start of the SVE callee-save area.
1482+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1483+
StackOffset::getScalable(-SVECalleeSavedSize), TII,
1484+
MachineInstr::FrameDestroy);
1485+
} else if (BaseForSVEDealloc == AArch64::SP) {
1486+
if (SVECalleeSavedSize) {
1487+
// Deallocate the non-SVE locals first before we can deallocate (and
1488+
// restore callee saves) from the SVE area.
1489+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1490+
StackOffset::getFixed(NumBytes), TII,
1491+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1492+
&HasWinCFI, EmitCFI && !HasFP,
1493+
SVEStackSize + StackOffset::getFixed(
1494+
NumBytes + PrologueSaveSize));
1495+
NumBytes = 0;
1496+
}
1497+
1498+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1499+
DeallocateBefore, TII, MachineInstr::FrameDestroy,
1500+
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1501+
SVEStackSize +
1502+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1503+
1504+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1505+
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1506+
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1507+
DeallocateAfter +
1508+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1509+
}
1510+
1511+
if (EmitCFI)
1512+
emitCalleeSavedSVERestores(RestoreEnd);
15171513
}
1518-
if (EmitCFI)
1519-
emitCalleeSavedSVERestores(RestoreEnd);
1514+
} else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
1515+
reportFatalInternalError("not implemented yet");
15201516
}
15211517

15221518
if (!HasFP) {

0 commit comments

Comments
 (0)