@@ -708,85 +708,75 @@ void AArch64PrologueEmitter::emitPrologue() {
708708 if (AFL.windowsRequiresStackProbe (MF, NumBytes + RealignmentPadding))
709709 emitWindowsStackProbe (AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
710710
711- MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
712-
713711 StackOffset PPRCalleeSavesSize =
714712 StackOffset::getScalable (AFI->getPPRCalleeSavedStackSize ());
715713 StackOffset ZPRCalleeSavesSize =
716714 StackOffset::getScalable (AFI->getZPRCalleeSavedStackSize ());
717715 StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
718716 StackOffset PPRLocalsSize = AFL.getPPRStackSize (MF) - PPRCalleeSavesSize;
719717 StackOffset ZPRLocalsSize = AFL.getZPRStackSize (MF) - ZPRCalleeSavesSize;
718+ std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
719+ ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
720720
721721 StackOffset CFAOffset =
722722 StackOffset::getFixed ((int64_t )MFI.getStackSize () - NumBytes);
723723 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
724-
725724 if (!FPAfterSVECalleeSaves) {
726- MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
727- ZPRCalleeSavesEnd = AfterGPRSavesI;
728- MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
729- PPRCalleeSavesEnd = AfterGPRSavesI;
730-
731- // Process the SVE callee-saves to determine what space needs to be
732- // allocated.
733-
725+ // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
726+ // areas.
727+ PPRCalleeSavesBegin = AfterGPRSavesI;
734728 if (PPRCalleeSavesSize) {
735729 LLVM_DEBUG (dbgs () << " PPRCalleeSavedStackSize = "
736730 << PPRCalleeSavesSize.getScalable () << " \n " );
737731
738- PPRCalleeSavesBegin = AfterSVESavesI;
739- assert (isPartOfPPRCalleeSaves (PPRCalleeSavesBegin) &&
732+ assert (isPartOfPPRCalleeSaves (*PPRCalleeSavesBegin) &&
740733 " Unexpected instruction" );
741734 while (isPartOfPPRCalleeSaves (AfterSVESavesI) &&
742735 AfterSVESavesI != MBB.getFirstTerminator ())
743736 ++AfterSVESavesI;
744- PPRCalleeSavesEnd = AfterSVESavesI;
745737 }
746-
738+ PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
747739 if (ZPRCalleeSavesSize) {
748740 LLVM_DEBUG (dbgs () << " ZPRCalleeSavedStackSize = "
749741 << ZPRCalleeSavesSize.getScalable () << " \n " );
750- ZPRCalleeSavesBegin = AfterSVESavesI;
751- assert (isPartOfZPRCalleeSaves (ZPRCalleeSavesBegin) &&
742+ assert (isPartOfZPRCalleeSaves (*ZPRCalleeSavesBegin) &&
752743 " Unexpected instruction" );
753744 while (isPartOfZPRCalleeSaves (AfterSVESavesI) &&
754745 AfterSVESavesI != MBB.getFirstTerminator ())
755746 ++AfterSVESavesI;
756- ZPRCalleeSavesEnd = AfterSVESavesI;
757747 }
748+ ZPRCalleeSavesEnd = AfterSVESavesI;
749+ }
750+
751+ if (EmitAsyncCFI)
752+ emitCalleeSavedSVELocations (AfterSVESavesI);
758753
754+ if (AFI->hasSplitSVEObjects ()) {
755+ reportFatalInternalError (" not implemented yet" );
756+ } else {
759757 // Allocate space for the callee saves (if any).
760758 StackOffset LocalsSize =
761759 PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed (NumBytes);
762- MachineBasicBlock::iterator CalleeSavesBegin =
763- AFI->getPPRCalleeSavedStackSize () ? PPRCalleeSavesBegin
764- : ZPRCalleeSavesBegin;
765- allocateStackSpace (CalleeSavesBegin, 0 , SVECalleeSavesSize,
766- EmitAsyncCFI && !HasFP, CFAOffset,
767- MFI.hasVarSizedObjects () || LocalsSize);
768-
769- CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize () ? ZPRCalleeSavesEnd
770- : PPRCalleeSavesEnd;
771- }
772- CFAOffset += SVECalleeSavesSize;
773-
774- if (EmitAsyncCFI)
775- emitCalleeSavedSVELocations (CalleeSavesEnd);
776-
777- // Allocate space for the rest of the frame including SVE locals. Align the
778- // stack as necessary.
779- assert (!(AFL.canUseRedZone (MF) && NeedsRealignment) &&
780- " Cannot use redzone with stack realignment" );
781- if (!AFL.canUseRedZone (MF)) {
782- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
783- // the correct value here, as NumBytes also includes padding bytes,
784- // which shouldn't be counted here.
785- StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
786- allocateStackSpace (CalleeSavesEnd, RealignmentPadding,
787- SVELocalsSize + StackOffset::getFixed (NumBytes),
788- EmitAsyncCFI && !HasFP, CFAOffset,
789- MFI.hasVarSizedObjects ());
760+ if (!FPAfterSVECalleeSaves)
761+ allocateStackSpace (AfterGPRSavesI, 0 , SVECalleeSavesSize,
762+ EmitAsyncCFI && !HasFP, CFAOffset,
763+ MFI.hasVarSizedObjects () || LocalsSize);
764+ CFAOffset += SVECalleeSavesSize;
765+
766+ // Allocate space for the rest of the frame including SVE locals. Align the
767+ // stack as necessary.
768+ assert (!(AFL.canUseRedZone (MF) && NeedsRealignment) &&
769+ " Cannot use redzone with stack realignment" );
770+ if (!AFL.canUseRedZone (MF)) {
771+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
772+ // the correct value here, as NumBytes also includes padding bytes,
773+ // which shouldn't be counted here.
774+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
775+ allocateStackSpace (AfterSVESavesI, RealignmentPadding,
776+ SVELocalsSize + StackOffset::getFixed (NumBytes),
777+ EmitAsyncCFI && !HasFP, CFAOffset,
778+ MFI.hasVarSizedObjects ());
779+ }
790780 }
791781
792782 // If we need a base pointer, set it up here. It's whatever the value of the
@@ -1384,7 +1374,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13841374 if (HasFP && AFI->hasSwiftAsyncContext ())
13851375 emitSwiftAsyncContextFramePointer (EpilogueEndI, DL);
13861376
1387- StackOffset SVEStackSize = AFL.getSVEStackSize (MF);
1377+ StackOffset ZPRStackSize = AFL.getZPRStackSize (MF);
1378+ StackOffset PPRStackSize = AFL.getPPRStackSize (MF);
1379+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
13881380
13891381 // If there is a single SP update, insert it before the ret and we're done.
13901382 if (CombineSPBump) {
@@ -1405,111 +1397,120 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14051397 NumBytes -= PrologueSaveSize;
14061398 assert (NumBytes >= 0 && " Negative stack allocation size!?" );
14071399
1408- // Process the SVE callee-saves to determine what space needs to be
1409- // deallocated.
1410- StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1411- MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1412- RestoreEnd = FirstGPRRestoreI;
1413- int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize ();
1414- int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize ();
1415- int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1416-
1417- if (SVECalleeSavedSize) {
1418- if (FPAfterSVECalleeSaves)
1419- RestoreEnd = MBB.getFirstTerminator ();
1420-
1421- RestoreBegin = std::prev (RestoreEnd);
1422- while (RestoreBegin != MBB.begin () &&
1423- isPartOfSVECalleeSaves (std::prev (RestoreBegin)))
1424- --RestoreBegin;
1425-
1426- assert (isPartOfSVECalleeSaves (RestoreBegin) &&
1427- isPartOfSVECalleeSaves (std::prev (RestoreEnd)) &&
1428- " Unexpected instruction" );
1429-
1430- StackOffset CalleeSavedSizeAsOffset =
1431- StackOffset::getScalable (SVECalleeSavedSize);
1432- DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1433- DeallocateAfter = CalleeSavedSizeAsOffset;
1434- }
1400+ if (!AFI->hasSplitSVEObjects ()) {
1401+ // Process the SVE callee-saves to determine what space needs to be
1402+ // deallocated.
1403+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1404+ MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1405+ RestoreEnd = FirstGPRRestoreI;
1406+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize ();
1407+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize ();
1408+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1409+
1410+ if (SVECalleeSavedSize) {
1411+ if (FPAfterSVECalleeSaves)
1412+ RestoreEnd = MBB.getFirstTerminator ();
1413+
1414+ RestoreBegin = std::prev (RestoreEnd);
1415+ while (RestoreBegin != MBB.begin () &&
1416+ isPartOfSVECalleeSaves (std::prev (RestoreBegin)))
1417+ --RestoreBegin;
1418+
1419+ assert (isPartOfSVECalleeSaves (RestoreBegin) &&
1420+ isPartOfSVECalleeSaves (std::prev (RestoreEnd)) &&
1421+ " Unexpected instruction" );
14351422
1436- // Deallocate the SVE area.
1437- if (FPAfterSVECalleeSaves) {
1438- // If the callee-save area is before FP, restoring the FP implicitly
1439- // deallocates non-callee-save SVE allocations. Otherwise, deallocate
1440- // them explicitly.
1441- if (!AFI->isStackRealigned () && !MFI.hasVarSizedObjects ()) {
1442- emitFrameOffset (MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1443- DeallocateBefore, TII, MachineInstr::FrameDestroy, false ,
1444- NeedsWinCFI, &HasWinCFI);
1423+ StackOffset CalleeSavedSizeAsOffset =
1424+ StackOffset::getScalable (SVECalleeSavedSize);
1425+ DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1426+ DeallocateAfter = CalleeSavedSizeAsOffset;
14451427 }
14461428
1447- // Deallocate callee-save non-SVE registers.
1448- emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1449- StackOffset::getFixed (AFI->getCalleeSavedStackSize ()), TII,
1450- MachineInstr::FrameDestroy, false , NeedsWinCFI, &HasWinCFI);
1451-
1452- // Deallocate fixed objects.
1453- emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1454- StackOffset::getFixed (FixedObject), TII,
1455- MachineInstr::FrameDestroy, false , NeedsWinCFI, &HasWinCFI);
1456-
1457- // Deallocate callee-save SVE registers.
1458- emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1459- DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
1460- NeedsWinCFI, &HasWinCFI);
1461- } else if (SVEStackSize) {
1462- int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ();
1463- // If we have stack realignment or variable-sized objects we must use the
1464- // FP to restore SVE callee saves (as there is an unknown amount of
1465- // data/padding between the SP and SVE CS area).
1466- Register BaseForSVEDealloc =
1467- (AFI->isStackRealigned () || MFI.hasVarSizedObjects ()) ? AArch64::FP
1468- : AArch64::SP;
1469- if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1470- Register CalleeSaveBase = AArch64::FP;
1471- if (int64_t CalleeSaveBaseOffset =
1472- AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
1473- // If we have have an non-zero offset to the non-SVE CS base we need to
1474- // compute the base address by subtracting the offest in a temporary
1475- // register first (to avoid briefly deallocating the SVE CS).
1476- CalleeSaveBase =
1477- MF.getRegInfo ().createVirtualRegister (&AArch64::GPR64RegClass);
1478- emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1479- StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
1480- MachineInstr::FrameDestroy);
1481- }
1482- // The code below will deallocate the stack space space by moving the
1483- // SP to the start of the SVE callee-save area.
1484- emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1485- StackOffset::getScalable (-SVECalleeSavedSize), TII,
1486- MachineInstr::FrameDestroy);
1487- } else if (BaseForSVEDealloc == AArch64::SP) {
1488- if (SVECalleeSavedSize) {
1489- // Deallocate the non-SVE locals first before we can deallocate (and
1490- // restore callee saves) from the SVE area.
1491- emitFrameOffset (
1492- MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1493- StackOffset::getFixed (NumBytes), TII, MachineInstr::FrameDestroy,
1494- false , NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1495- SVEStackSize + StackOffset::getFixed (NumBytes + PrologueSaveSize));
1496- NumBytes = 0 ;
1429+ // Deallocate the SVE area.
1430+ if (FPAfterSVECalleeSaves) {
1431+ // If the callee-save area is before FP, restoring the FP implicitly
1432+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate
1433+ // them explicitly.
1434+ if (!AFI->isStackRealigned () && !MFI.hasVarSizedObjects ()) {
1435+ emitFrameOffset (MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1436+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
1437+ false , NeedsWinCFI, &HasWinCFI);
14971438 }
14981439
1440+ // Deallocate callee-save non-SVE registers.
14991441 emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1500- DeallocateBefore, TII, MachineInstr::FrameDestroy, false ,
1501- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1502- SVEStackSize +
1503- StackOffset::getFixed (NumBytes + PrologueSaveSize));
1442+ StackOffset::getFixed (AFI->getCalleeSavedStackSize ()),
1443+ TII, MachineInstr::FrameDestroy, false , NeedsWinCFI,
1444+ &HasWinCFI);
1445+
1446+ // Deallocate fixed objects.
1447+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1448+ StackOffset::getFixed (FixedObject), TII,
1449+ MachineInstr::FrameDestroy, false , NeedsWinCFI,
1450+ &HasWinCFI);
15041451
1452+ // Deallocate callee-save SVE registers.
15051453 emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
15061454 DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
1507- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1508- DeallocateAfter +
1509- StackOffset::getFixed (NumBytes + PrologueSaveSize));
1455+ NeedsWinCFI, &HasWinCFI);
1456+ } else if (SVEStackSize) {
1457+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ();
1458+ // If we have stack realignment or variable-sized objects we must use the
1459+ // FP to restore SVE callee saves (as there is an unknown amount of
1460+ // data/padding between the SP and SVE CS area).
1461+ Register BaseForSVEDealloc =
1462+ (AFI->isStackRealigned () || MFI.hasVarSizedObjects ()) ? AArch64::FP
1463+ : AArch64::SP;
1464+ if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1465+ Register CalleeSaveBase = AArch64::FP;
1466+ if (int64_t CalleeSaveBaseOffset =
1467+ AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
1468+ // If we have have an non-zero offset to the non-SVE CS base we need
1469+ // to compute the base address by subtracting the offest in a
1470+ // temporary register first (to avoid briefly deallocating the SVE
1471+ // CS).
1472+ CalleeSaveBase = MBB.getParent ()->getRegInfo ().createVirtualRegister (
1473+ &AArch64::GPR64RegClass);
1474+ emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1475+ StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
1476+ MachineInstr::FrameDestroy);
1477+ }
1478+ // The code below will deallocate the stack space space by moving the
1479+ // SP to the start of the SVE callee-save area.
1480+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1481+ StackOffset::getScalable (-SVECalleeSavedSize), TII,
1482+ MachineInstr::FrameDestroy);
1483+ } else if (BaseForSVEDealloc == AArch64::SP) {
1484+ if (SVECalleeSavedSize) {
1485+ // Deallocate the non-SVE locals first before we can deallocate (and
1486+ // restore callee saves) from the SVE area.
1487+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1488+ StackOffset::getFixed (NumBytes), TII,
1489+ MachineInstr::FrameDestroy, false , NeedsWinCFI,
1490+ &HasWinCFI, EmitCFI && !HasFP,
1491+ SVEStackSize + StackOffset::getFixed (
1492+ NumBytes + PrologueSaveSize));
1493+ NumBytes = 0 ;
1494+ }
1495+
1496+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1497+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
1498+ false , NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1499+ SVEStackSize +
1500+ StackOffset::getFixed (NumBytes + PrologueSaveSize));
1501+
1502+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1503+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
1504+ NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1505+ DeallocateAfter +
1506+ StackOffset::getFixed (NumBytes + PrologueSaveSize));
1507+ }
1508+
1509+ if (EmitCFI)
1510+ emitCalleeSavedSVERestores (RestoreEnd);
15101511 }
1511- if (EmitCFI)
1512- emitCalleeSavedSVERestores (RestoreEnd );
1512+ } else if (AFI-> hasSplitSVEObjects () && SVEStackSize) {
1513+ reportFatalInternalError ( " not implemented yet " );
15131514 }
15141515
15151516 if (!HasFP) {
0 commit comments