@@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {
370370 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
371371}
372372
373+ SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations (
374+ SVEFrameSizes const &SVE) {
375+ StackOffset AfterZPRs = SVE.ZPR .LocalsSize ;
376+ StackOffset BeforePPRs = SVE.ZPR .CalleeSavesSize + SVE.PPR .CalleeSavesSize ;
377+ StackOffset AfterPPRs = {};
378+ if (SVELayout == SVEStackLayout::Split) {
379+ BeforePPRs = SVE.PPR .CalleeSavesSize ;
380+ // If there are no ZPR CSRs, place all local allocations after the ZPRs.
381+ if (SVE.ZPR .CalleeSavesSize )
382+ AfterPPRs += SVE.PPR .LocalsSize + SVE.ZPR .CalleeSavesSize ;
383+ else
384+ AfterZPRs += SVE.PPR .LocalsSize ; // Group allocation of locals.
385+ }
386+ return {BeforePPRs, AfterPPRs, AfterZPRs};
387+ }
388+
373389struct SVEPartitions {
374390 struct {
375391 MachineBasicBlock::iterator Begin, End;
@@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() {
687703 // All of the remaining stack allocations are for locals.
688704 determineLocalsStackSize (NumBytes, PrologueSaveSize);
689705
706+ auto [PPR, ZPR] = getSVEStackFrameSizes ();
707+ SVEStackAllocations SVEAllocs = getSVEStackAllocations ({PPR, ZPR});
708+
690709 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
691710 if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
711+ assert (!SVEAllocs.AfterPPRs &&
712+ " unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord" );
692713 // If we're doing SVE saves first, we need to immediately allocate space
693714 // for fixed objects, then space for the SVE callee saves.
694715 //
695716 // Windows unwind requires that the scalable size is a multiple of 16;
696717 // that's handled when the callee-saved size is computed.
697- auto SaveSize =
698- StackOffset::getScalable (AFI->getSVECalleeSavedStackSize ()) +
699- StackOffset::getFixed (FixedObject);
718+ auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed (FixedObject);
700719 allocateStackSpace (PrologueBeginI, 0 , SaveSize, false , StackOffset{},
701720 /* FollowupAllocs=*/ true );
702721 NumBytes -= FixedObject;
@@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() {
764783 if (AFL.windowsRequiresStackProbe (MF, NumBytes + RealignmentPadding))
765784 emitWindowsStackProbe (AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
766785
767- auto [PPR, ZPR] = getSVEStackFrameSizes ();
768- StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize ;
769786 StackOffset NonSVELocalsSize = StackOffset::getFixed (NumBytes);
787+ SVEAllocs.AfterZPRs += NonSVELocalsSize;
788+
770789 StackOffset CFAOffset =
771790 StackOffset::getFixed (MFI.getStackSize ()) - NonSVELocalsSize;
772-
773791 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
774792 // Allocate space for the callee saves and PPR locals (if any).
775793 if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) {
@@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() {
780798 if (EmitAsyncCFI)
781799 emitCalleeSavedSVELocations (AfterSVESavesI);
782800
783- StackOffset AllocateBeforePPRs = SVECalleeSavesSize;
784- StackOffset AllocateAfterPPRs = PPR.LocalsSize ;
785- if (SVELayout == SVEStackLayout::Split) {
786- AllocateBeforePPRs = PPR.CalleeSavesSize ;
787- AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize ;
788- }
789- allocateStackSpace (PPRRange.Begin , 0 , AllocateBeforePPRs,
801+ allocateStackSpace (PPRRange.Begin , 0 , SVEAllocs.BeforePPRs ,
790802 EmitAsyncCFI && !HasFP, CFAOffset,
791- MFI.hasVarSizedObjects () || AllocateAfterPPRs ||
792- ZPR. LocalsSize || NonSVELocalsSize );
793- CFAOffset += AllocateBeforePPRs ;
803+ MFI.hasVarSizedObjects () || SVEAllocs. AfterPPRs ||
804+ SVEAllocs. AfterZPRs );
805+ CFAOffset += SVEAllocs. BeforePPRs ;
794806 assert (PPRRange.End == ZPRRange.Begin &&
795807 " Expected ZPR callee saves after PPR locals" );
796- allocateStackSpace (PPRRange.End , RealignmentPadding, AllocateAfterPPRs ,
808+ allocateStackSpace (PPRRange.End , RealignmentPadding, SVEAllocs. AfterPPRs ,
797809 EmitAsyncCFI && !HasFP, CFAOffset,
798- MFI.hasVarSizedObjects () || ZPR.LocalsSize ||
799- NonSVELocalsSize);
800- CFAOffset += AllocateAfterPPRs;
810+ MFI.hasVarSizedObjects () || SVEAllocs.AfterZPRs );
811+ CFAOffset += SVEAllocs.AfterPPRs ;
801812 } else {
802813 assert (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
803- // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been
804- // allocated (and separate PPR locals are not supported, all SVE locals,
805- // both PPR and ZPR, are within the ZPR locals area).
806- assert (!PPR.LocalsSize && " Unexpected PPR locals!" );
807- CFAOffset += SVECalleeSavesSize;
814+ // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
815+ // already been allocated. PPR locals (included in AfterPPRs) are not
816+ // supported (note: this is asserted above).
817+ CFAOffset += SVEAllocs.BeforePPRs ;
808818 }
809819
810820 // Allocate space for the rest of the frame including ZPR locals. Align the
@@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() {
815825 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
816826 // correct value here, as NumBytes also includes padding bytes, which
817827 // shouldn't be counted here.
818- allocateStackSpace (
819- AfterSVESavesI, RealignmentPadding, ZPR. LocalsSize + NonSVELocalsSize ,
820- EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects ());
828+ allocateStackSpace (AfterSVESavesI, RealignmentPadding, SVEAllocs. AfterZPRs ,
829+ EmitAsyncCFI && !HasFP, CFAOffset ,
830+ MFI.hasVarSizedObjects ());
821831 }
822832
823833 // If we need a base pointer, set it up here. It's whatever the value of the
@@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14721482 assert (NumBytes >= 0 && " Negative stack allocation size!?" );
14731483
14741484 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize ;
1475- StackOffset SVEStackSize =
1476- SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize ;
1485+ SVEStackAllocations SVEAllocs = getSVEStackAllocations ({PPR, ZPR});
14771486 MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin ;
1478- MachineBasicBlock::iterator RestoreEnd = PPRRange.End ;
14791487
14801488 // Deallocate the SVE area.
14811489 if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1482- StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize ;
1490+ assert (!SVEAllocs.AfterPPRs &&
1491+ " unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord" );
14831492 // If the callee-save area is before FP, restoring the FP implicitly
1484- // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1493+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
14851494 // explicitly.
14861495 if (!AFI->isStackRealigned () && !MFI.hasVarSizedObjects ()) {
14871496 emitFrameOffset (MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1488- SVELocalsSize , TII, MachineInstr::FrameDestroy, false ,
1489- NeedsWinCFI, &HasWinCFI);
1497+ SVEAllocs. AfterZPRs , TII, MachineInstr::FrameDestroy,
1498+ false , NeedsWinCFI, &HasWinCFI);
14901499 }
14911500
14921501 // Deallocate callee-save SVE registers.
1493- emitFrameOffset (MBB, RestoreEnd , DL, AArch64::SP, AArch64::SP,
1494- SVECalleeSavesSize , TII, MachineInstr::FrameDestroy, false ,
1495- NeedsWinCFI, &HasWinCFI);
1502+ emitFrameOffset (MBB, PPRRange. End , DL, AArch64::SP, AArch64::SP,
1503+ SVEAllocs. BeforePPRs , TII, MachineInstr::FrameDestroy,
1504+ false , NeedsWinCFI, &HasWinCFI);
14961505 } else if (AFI->hasSVEStackSize ()) {
14971506 // If we have stack realignment or variable-sized objects we must use the FP
14981507 // to restore SVE callee saves (as there is an unknown amount of
@@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15241533 emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
15251534 -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy);
15261535 } else if (BaseForSVEDealloc == AArch64::SP) {
1527- auto CFAOffset =
1528- SVEStackSize + StackOffset::getFixed (NumBytes + PrologueSaveSize);
1529-
1530- if (SVECalleeSavesSize) {
1531- // Deallocate the non-SVE locals first before we can deallocate (and
1532- // restore callee saves) from the SVE area.
1533- auto NonSVELocals = StackOffset::getFixed (NumBytes);
1534- emitFrameOffset (MBB, ZPRRange.Begin , DL, AArch64::SP, AArch64::SP,
1535- NonSVELocals, TII, MachineInstr::FrameDestroy, false ,
1536- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1537- CFAOffset -= NonSVELocals;
1538- NumBytes = 0 ;
1539- }
1540-
1541- if (ZPR.LocalsSize ) {
1542- emitFrameOffset (MBB, ZPRRange.Begin , DL, AArch64::SP, AArch64::SP,
1543- ZPR.LocalsSize , TII, MachineInstr::FrameDestroy, false ,
1544- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1545- CFAOffset -= ZPR.LocalsSize ;
1536+ auto NonSVELocals = StackOffset::getFixed (NumBytes);
1537+ auto CFAOffset = NonSVELocals + StackOffset::getFixed (PrologueSaveSize) +
1538+ SVEAllocs.totalSize ();
1539+
1540+ if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1541+ // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1542+ // saves, but may also allow combining stack hazard bumps for split SVE.
1543+ SVEAllocs.AfterZPRs += NonSVELocals;
1544+ NumBytes -= NonSVELocals.getFixed ();
15461545 }
1547-
1548- StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize;
1549- if (SVELayout == SVEStackLayout::Split &&
1550- (PPR.LocalsSize || ZPR.CalleeSavesSize )) {
1551- assert (PPRRange.Begin == ZPRRange.End &&
1552- " Expected PPR restores after ZPR" );
1553- emitFrameOffset (MBB, PPRRange.Begin , DL, AArch64::SP, AArch64::SP,
1554- PPR.LocalsSize + ZPR.CalleeSavesSize , TII,
1555- MachineInstr::FrameDestroy, false , NeedsWinCFI,
1556- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1557- CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize ;
1558- SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize ;
1559- }
1560-
1561- // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs:
1562- if (SVECalleeSavesToDealloc)
1563- emitFrameOffset (MBB, PPRRange.End , DL, AArch64::SP, AArch64::SP,
1564- SVECalleeSavesToDealloc, TII,
1565- MachineInstr::FrameDestroy, false , NeedsWinCFI,
1566- &HasWinCFI, EmitCFI && !HasFP, CFAOffset);
1546+ // To deallocate the SVE stack adjust by the allocations in reverse.
1547+ emitFrameOffset (MBB, ZPRRange.Begin , DL, AArch64::SP, AArch64::SP,
1548+ SVEAllocs.AfterZPRs , TII, MachineInstr::FrameDestroy,
1549+ false , NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1550+ CFAOffset);
1551+ CFAOffset -= SVEAllocs.AfterZPRs ;
1552+ assert (PPRRange.Begin == ZPRRange.End &&
1553+ " Expected PPR restores after ZPR" );
1554+ emitFrameOffset (MBB, PPRRange.Begin , DL, AArch64::SP, AArch64::SP,
1555+ SVEAllocs.AfterPPRs , TII, MachineInstr::FrameDestroy,
1556+ false , NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1557+ CFAOffset);
1558+ CFAOffset -= SVEAllocs.AfterPPRs ;
1559+ emitFrameOffset (MBB, PPRRange.End , DL, AArch64::SP, AArch64::SP,
1560+ SVEAllocs.BeforePPRs , TII, MachineInstr::FrameDestroy,
1561+ false , NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1562+ CFAOffset);
15671563 }
15681564
15691565 if (EmitCFI)
0 commit comments