@@ -1200,7 +1200,25 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
12001200
12011201 switch (Opc) {
12021202 default :
1203- llvm_unreachable (" No SEH Opcode for this instruction" );
1203+ report_fatal_error (" No SEH Opcode for this instruction" );
1204+ case AArch64::STR_ZXI:
1205+ case AArch64::LDR_ZXI: {
1206+ unsigned Reg0 = RegInfo->getSEHRegNum (MBBI->getOperand (0 ).getReg ());
1207+ MIB = BuildMI (MF, DL, TII.get (AArch64::SEH_SaveZReg))
1208+ .addImm (Reg0)
1209+ .addImm (Imm)
1210+ .setMIFlag (Flag);
1211+ break ;
1212+ }
1213+ case AArch64::STR_PXI:
1214+ case AArch64::LDR_PXI: {
1215+ unsigned Reg0 = RegInfo->getSEHRegNum (MBBI->getOperand (0 ).getReg ());
1216+ MIB = BuildMI (MF, DL, TII.get (AArch64::SEH_SavePReg))
1217+ .addImm (Reg0)
1218+ .addImm (Imm)
1219+ .setMIFlag (Flag);
1220+ break ;
1221+ }
12041222 case AArch64::LDPDpost:
12051223 Imm = -Imm;
12061224 [[fallthrough]];
@@ -1592,6 +1610,9 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
15921610 case AArch64::CMPNE_PPzZI_B:
15931611 return I->getFlag (MachineInstr::FrameSetup) ||
15941612 I->getFlag (MachineInstr::FrameDestroy);
1613+ case AArch64::SEH_SavePReg:
1614+ case AArch64::SEH_SaveZReg:
1615+ return true ;
15951616 }
15961617}
15971618
@@ -1874,12 +1895,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
18741895 bool IsWin64 = Subtarget.isCallingConvWin64 (F.getCallingConv (), F.isVarArg ());
18751896 unsigned FixedObject = getFixedObjectSize (MF, AFI, IsWin64, IsFunclet);
18761897
1898+ // Windows unwind can't represent the required stack adjustments if we have
1899+ // both SVE callee-saves and dynamic stack allocations, and the frame
1900+ // pointer is before the SVE spills. The allocation of the frame pointer
1901+ // must be the last instruction in the prologue so the unwinder can restore
1902+ // the stack pointer correctly. (And there isn't any unwind opcode for
1903+ // `addvl sp, x29, -17`.)
1904+ //
1905+ // Because of this, we do spills in the opposite order on Windows: first SVE,
1906+ // then GPRs. The main side-effect of this is that it makes accessing
1907+ // parameters passed on the stack more expensive.
1908+ //
1909+ // We could consider rearranging the spills for simpler cases.
1910+ bool FPAfterSVECalleeSaves =
1911+ Subtarget.isTargetWindows () && AFI->getSVECalleeSavedStackSize ();
1912+
18771913 auto PrologueSaveSize = AFI->getCalleeSavedStackSize () + FixedObject;
18781914 // All of the remaining stack allocations are for locals.
18791915 AFI->setLocalStackSize (NumBytes - PrologueSaveSize);
18801916 bool CombineSPBump = shouldCombineCSRLocalStackBump (MF, NumBytes);
18811917 bool HomPrologEpilog = homogeneousPrologEpilog (MF);
1882- if (CombineSPBump) {
1918+ if (FPAfterSVECalleeSaves) {
1919+ // If we're doing SVE saves first, we need to immediately allocate space
1920+ // for fixed objects, then space for the SVE callee saves.
1921+ //
1922+ // Windows unwind requires that the scalable size is a multiple of 16;
1923+ // that's handled when the callee-saved size is computed.
1924+ auto SaveSize =
1925+ StackOffset::getScalable (AFI->getSVECalleeSavedStackSize ()) +
1926+ StackOffset::getFixed (FixedObject);
1927+ allocateStackSpace (MBB, MBBI, 0 , SaveSize, NeedsWinCFI, &HasWinCFI,
1928+ /* EmitCFI=*/ false , StackOffset{},
1929+ /* FollowupAllocs=*/ true );
1930+ NumBytes -= FixedObject;
1931+
1932+ // Now allocate space for the GPR callee saves.
1933+ while (MBBI != End && IsSVECalleeSave (MBBI))
1934+ ++MBBI;
1935+ MBBI = convertCalleeSaveRestoreToSPPrePostIncDec (
1936+ MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize (), NeedsWinCFI,
1937+ &HasWinCFI, EmitAsyncCFI);
1938+ NumBytes -= AFI->getCalleeSavedStackSize ();
1939+ } else if (CombineSPBump) {
18831940 assert (!SVEStackSize && " Cannot combine SP bump with SVE" );
18841941 emitFrameOffset (MBB, MBBI, DL, AArch64::SP, AArch64::SP,
18851942 StackOffset::getFixed (-NumBytes), TII,
@@ -1982,6 +2039,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19822039 : 0 ;
19832040
19842041 if (windowsRequiresStackProbe (MF, NumBytes + RealignmentPadding)) {
2042+ if (AFI->getSVECalleeSavedStackSize ())
2043+ report_fatal_error (" SVE callee saves not yet supported" );
19852044 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4 ;
19862045 if (NeedsWinCFI) {
19872046 HasWinCFI = true ;
@@ -2116,9 +2175,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
21162175 << " \n " );
21172176 // Find callee save instructions in frame.
21182177 CalleeSavesBegin = MBBI;
2119- assert (IsSVECalleeSave (CalleeSavesBegin) && " Unexpected instruction" );
2120- while (IsSVECalleeSave (MBBI) && MBBI != MBB.getFirstTerminator ())
2121- ++MBBI;
2178+ if (!FPAfterSVECalleeSaves) {
2179+ assert (IsSVECalleeSave (CalleeSavesBegin) && " Unexpected instruction" );
2180+ while (IsSVECalleeSave (MBBI) && MBBI != MBB.getFirstTerminator ())
2181+ ++MBBI;
2182+ }
21222183 CalleeSavesEnd = MBBI;
21232184
21242185 SVECalleeSavesSize = StackOffset::getScalable (CalleeSavedSize);
@@ -2129,9 +2190,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
21292190 StackOffset CFAOffset =
21302191 StackOffset::getFixed ((int64_t )MFI.getStackSize () - NumBytes);
21312192 StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed (NumBytes);
2132- allocateStackSpace (MBB, CalleeSavesBegin, 0 , SVECalleeSavesSize, false ,
2133- nullptr , EmitAsyncCFI && !HasFP, CFAOffset,
2134- MFI.hasVarSizedObjects () || LocalsSize);
2193+ if (!FPAfterSVECalleeSaves) {
2194+ allocateStackSpace (MBB, CalleeSavesBegin, 0 , SVECalleeSavesSize, false ,
2195+ nullptr , EmitAsyncCFI && !HasFP, CFAOffset,
2196+ MFI.hasVarSizedObjects () || LocalsSize);
2197+ }
21352198 CFAOffset += SVECalleeSavesSize;
21362199
21372200 if (EmitAsyncCFI)
@@ -2303,10 +2366,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
23032366 assert (AfterCSRPopSize == 0 );
23042367 return ;
23052368 }
2369+
2370+ bool FPAfterSVECalleeSaves =
2371+ Subtarget.isTargetWindows () && AFI->getSVECalleeSavedStackSize ();
2372+
23062373 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue (MBB, NumBytes);
23072374 // Assume we can't combine the last pop with the sp restore.
23082375 bool CombineAfterCSRBump = false ;
2309- if (!CombineSPBump && PrologueSaveSize != 0 ) {
2376+ if (FPAfterSVECalleeSaves) {
2377+ AfterCSRPopSize = FixedObject;
2378+ } else if (!CombineSPBump && PrologueSaveSize != 0 ) {
23102379 MachineBasicBlock::iterator Pop = std::prev (MBB.getFirstTerminator ());
23112380 while (Pop->getOpcode () == TargetOpcode::CFI_INSTRUCTION ||
23122381 AArch64InstrInfo::isSEHInstruction (*Pop))
@@ -2339,7 +2408,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
23392408 while (LastPopI != Begin) {
23402409 --LastPopI;
23412410 if (!LastPopI->getFlag (MachineInstr::FrameDestroy) ||
2342- IsSVECalleeSave (LastPopI)) {
2411+ (!FPAfterSVECalleeSaves && IsSVECalleeSave (LastPopI) )) {
23432412 ++LastPopI;
23442413 break ;
23452414 } else if (CombineSPBump)
@@ -2415,6 +2484,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
24152484 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
24162485 MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
24172486 if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize ()) {
2487+ if (FPAfterSVECalleeSaves)
2488+ RestoreEnd = MBB.getFirstTerminator ();
2489+
24182490 RestoreBegin = std::prev (RestoreEnd);
24192491 while (RestoreBegin != MBB.begin () &&
24202492 IsSVECalleeSave (std::prev (RestoreBegin)))
@@ -2430,7 +2502,31 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
24302502 }
24312503
24322504 // Deallocate the SVE area.
2433- if (SVEStackSize) {
2505+ if (FPAfterSVECalleeSaves) {
2506+ // If the callee-save area is before FP, restoring the FP implicitly
2507+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate
2508+ // them explicitly.
2509+ if (!AFI->isStackRealigned () && !MFI.hasVarSizedObjects ()) {
2510+ emitFrameOffset (MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
2511+ DeallocateBefore, TII, MachineInstr::FrameDestroy, false ,
2512+ NeedsWinCFI, &HasWinCFI);
2513+ }
2514+
2515+ // Deallocate callee-save non-SVE registers.
2516+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
2517+ StackOffset::getFixed (AFI->getCalleeSavedStackSize ()), TII,
2518+ MachineInstr::FrameDestroy, false , NeedsWinCFI, &HasWinCFI);
2519+
2520+ // Deallocate fixed objects.
2521+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2522+ StackOffset::getFixed (FixedObject), TII,
2523+ MachineInstr::FrameDestroy, false , NeedsWinCFI, &HasWinCFI);
2524+
2525+ // Deallocate callee-save SVE registers.
2526+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2527+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
2528+ NeedsWinCFI, &HasWinCFI);
2529+ } else if (SVEStackSize) {
24342530 // If we have stack realignment or variable sized objects on the stack,
24352531 // restore the stack pointer from the frame pointer prior to SVE CSR
24362532 // restoration.
@@ -2450,20 +2546,20 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
24502546 emitFrameOffset (
24512547 MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
24522548 StackOffset::getFixed (NumBytes), TII, MachineInstr::FrameDestroy,
2453- false , false , nullptr , EmitCFI && !hasFP (MF),
2549+ false , NeedsWinCFI, &HasWinCFI , EmitCFI && !hasFP (MF),
24542550 SVEStackSize + StackOffset::getFixed (NumBytes + PrologueSaveSize));
24552551 NumBytes = 0 ;
24562552 }
24572553
24582554 emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
24592555 DeallocateBefore, TII, MachineInstr::FrameDestroy, false ,
2460- false , nullptr , EmitCFI && !hasFP (MF),
2556+ NeedsWinCFI, &HasWinCFI , EmitCFI && !hasFP (MF),
24612557 SVEStackSize +
24622558 StackOffset::getFixed (NumBytes + PrologueSaveSize));
24632559
24642560 emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
24652561 DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
2466- false , nullptr , EmitCFI && !hasFP (MF),
2562+ NeedsWinCFI, &HasWinCFI , EmitCFI && !hasFP (MF),
24672563 DeallocateAfter +
24682564 StackOffset::getFixed (NumBytes + PrologueSaveSize));
24692565 }
@@ -2757,10 +2853,27 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
27572853 }
27582854
27592855 StackOffset ScalableOffset = {};
2760- if (UseFP && !(isFixed || isCSR))
2761- ScalableOffset = -SVEStackSize;
2762- if (!UseFP && (isFixed || isCSR))
2763- ScalableOffset = SVEStackSize;
2856+ bool FPAfterSVECalleeSaves =
2857+ isTargetWindows (MF) && AFI->getSVECalleeSavedStackSize ();
2858+ if (FPAfterSVECalleeSaves) {
2859+ // In this stack layout, the FP is in between the callee saves and other
2860+ // SVE allocations.
2861+ StackOffset SVECalleeSavedStack =
2862+ StackOffset::getScalable (AFI->getSVECalleeSavedStackSize ());
2863+ if (UseFP) {
2864+ if (!(isFixed || isCSR))
2865+ ScalableOffset = SVECalleeSavedStack - SVEStackSize;
2866+ else
2867+ ScalableOffset = SVECalleeSavedStack;
2868+ } else if (!UseFP && (isFixed || isCSR)) {
2869+ ScalableOffset = SVEStackSize;
2870+ }
2871+ } else {
2872+ if (UseFP && !(isFixed || isCSR))
2873+ ScalableOffset = -SVEStackSize;
2874+ if (!UseFP && (isFixed || isCSR))
2875+ ScalableOffset = SVEStackSize;
2876+ }
27642877
27652878 if (UseFP) {
27662879 FrameReg = RegInfo->getFrameRegister (MF);
@@ -2934,7 +3047,9 @@ static void computeCalleeSaveRegisterPairs(
29343047 RegInc = -1 ;
29353048 FirstReg = Count - 1 ;
29363049 }
2937- int ScalableByteOffset = AFI->getSVECalleeSavedStackSize ();
3050+ bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize ();
3051+ int ScalableByteOffset =
3052+ FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize ();
29383053 bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace ();
29393054 Register LastReg = 0 ;
29403055
0 commit comments