@@ -805,7 +805,7 @@ void AArch64PrologueEmitter::emitPrologue() {
805805 CFAOffset += SVEAllocs.BeforePPRs ;
806806 assert (PPRRange.End == ZPRRange.Begin &&
807807 " Expected ZPR callee saves after PPR locals" );
808- allocateStackSpace (PPRRange.End , RealignmentPadding , SVEAllocs.AfterPPRs ,
808+ allocateStackSpace (PPRRange.End , 0 , SVEAllocs.AfterPPRs ,
809809 EmitAsyncCFI && !HasFP, CFAOffset,
810810 MFI.hasVarSizedObjects () || SVEAllocs.AfterZPRs );
811811 CFAOffset += SVEAllocs.AfterPPRs ;
@@ -1318,6 +1318,26 @@ AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
13181318 SEHEpilogueStartI = MBB.end ();
13191319}
13201320
1321+ void AArch64EpilogueEmitter::moveSPBelowFP (MachineBasicBlock::iterator MBBI,
1322+ StackOffset Offset) {
1323+ // Other combinations could be supported, but are not currently needed.
1324+ assert (Offset.getScalable () < 0 && Offset.getFixed () <= 0 &&
1325+ " expected negative offset (with optional fixed portion)" );
1326+ Register Base = AArch64::FP;
1327+ if (int64_t FixedOffset = Offset.getFixed ()) {
1328+ // If we have a negative fixed offset, we need to first subtract it in a
1329+ // temporary register first (to avoid briefly deallocating the scalable
1330+ // portion of the offset).
1331+ Base = MF.getRegInfo ().createVirtualRegister (&AArch64::GPR64RegClass);
1332+ emitFrameOffset (MBB, MBBI, DL, Base, AArch64::FP,
1333+ StackOffset::getFixed (FixedOffset), TII,
1334+ MachineInstr::FrameDestroy);
1335+ }
1336+ emitFrameOffset (MBB, MBBI, DL, AArch64::SP, Base,
1337+ StackOffset::getScalable (Offset.getScalable ()), TII,
1338+ MachineInstr::FrameDestroy);
1339+ }
1340+
13211341void AArch64EpilogueEmitter::emitEpilogue () {
13221342 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr ();
13231343 if (MBB.end () != EpilogueEndI) {
@@ -1418,6 +1438,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14181438 AfterCSRPopSize += ProloguePopSize;
14191439 }
14201440 }
1441+
14211442 // Move past the restores of the callee-saved registers.
14221443 // If we plan on combining the sp bump of the local stack size and the callee
14231444 // save stack size, we might need to adjust the CSR save and restore offsets.
@@ -1483,7 +1504,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14831504
14841505 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize ;
14851506 SVEStackAllocations SVEAllocs = getSVEStackAllocations ({PPR, ZPR});
1486- MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin ;
14871507
14881508 // Deallocate the SVE area.
14891509 if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
@@ -1510,28 +1530,25 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15101530 (AFI->isStackRealigned () || MFI.hasVarSizedObjects ()) ? AArch64::FP
15111531 : AArch64::SP;
15121532 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1513- // TODO: Support stack realigment and variable-sized objects.
1514- assert (
1515- SVELayout != SVEStackLayout::Split &&
1516- " unexpected stack realignment or variable sized objects with split "
1517- " SVE stack objects" );
1518-
1519- Register CalleeSaveBase = AArch64::FP;
1520- if (int64_t CalleeSaveBaseOffset =
1521- AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
1522- // If we have have an non-zero offset to the non-SVE CS base we need to
1523- // compute the base address by subtracting the offest in a temporary
1524- // register first (to avoid briefly deallocating the SVE CS).
1525- CalleeSaveBase = MBB.getParent ()->getRegInfo ().createVirtualRegister (
1526- &AArch64::GPR64RegClass);
1527- emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1528- StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
1529- MachineInstr::FrameDestroy);
1533+ if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1534+ // The offset from the frame-pointer to the start of the ZPR saves.
1535+ StackOffset FPOffsetZPR =
1536+ -SVECalleeSavesSize - PPR.LocalsSize -
1537+ StackOffset::getFixed (AFI->getCalleeSaveBaseToFrameRecordOffset ());
1538+ // Deallocate the stack space space by moving the SP to the start of the
1539+ // ZPR/PPR callee-save area.
1540+ moveSPBelowFP (ZPRRange.Begin , FPOffsetZPR);
1541+ }
1542+ // With split SVE, the predicates are stored in a separate area above the
1543+ // ZPR saves, so we must adjust the stack to the start of the PPRs.
1544+ if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1545+ // The offset from the frame-pointer to the start of the PPR saves.
1546+ StackOffset FPOffsetPPR = -PPR.CalleeSavesSize ;
1547+ // Move to the start of the PPR area.
1548+ assert (!FPOffsetPPR.getFixed () && " expected only scalable offset" );
1549+ emitFrameOffset (MBB, ZPRRange.End , DL, AArch64::SP, AArch64::FP,
1550+ FPOffsetPPR, TII, MachineInstr::FrameDestroy);
15301551 }
1531- // The code below will deallocate the stack space space by moving the SP
1532- // to the start of the SVE callee-save area.
1533- emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1534- -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
15351552 } else if (BaseForSVEDealloc == AArch64::SP) {
15361553 auto NonSVELocals = StackOffset::getFixed (NumBytes);
15371554 auto CFAOffset = NonSVELocals + StackOffset::getFixed (PrologueSaveSize) +
0 commit comments