@@ -331,7 +331,9 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
331331static bool produceCompactUnwindFrame (MachineFunction &MF);
332332static bool needsWinCFI (const MachineFunction &MF);
333333static StackOffset getSVEStackSize (const MachineFunction &MF);
334- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB);
334+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB,
335+ bool HasCall = false );
336+ static bool requiresSaveVG (const MachineFunction &MF);
335337
336338// / Returns true if a homogeneous prolog or epilog code can be emitted
337339// / for the size optimization. If possible, a frame helper call is injected.
@@ -1006,6 +1008,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
10061008 }
10071009}
10081010
1011+ static bool windowsRequiresStackProbe (const MachineFunction &MF,
1012+ uint64_t StackSizeInBytes) {
1013+ const AArch64Subtarget &Subtarget = MF.getSubtarget <AArch64Subtarget>();
1014+ const AArch64FunctionInfo &MFI = *MF.getInfo <AArch64FunctionInfo>();
1015+ // TODO: When implementing stack protectors, take that into account
1016+ // for the probe threshold.
1017+ return Subtarget.isTargetWindows () && MFI.hasStackProbing () &&
1018+ StackSizeInBytes >= uint64_t (MFI.getStackProbeSize ());
1019+ }
1020+
10091021static void getLiveRegsForEntryMBB (LivePhysRegs &LiveRegs,
10101022 const MachineBasicBlock &MBB) {
10111023 const MachineFunction *MF = MBB.getParent ();
@@ -1027,7 +1039,8 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
10271039// but we would then have to make sure that we were in fact saving at least one
10281040// callee-save register in the prologue, which is additional complexity that
10291041// doesn't seem worth the benefit.
1030- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB) {
1042+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB,
1043+ bool HasCall) {
10311044 MachineFunction *MF = MBB->getParent ();
10321045
10331046 // If MBB is an entry block, use X9 as the scratch register
@@ -1041,6 +1054,11 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
10411054 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo ();
10421055 LivePhysRegs LiveRegs (TRI);
10431056 getLiveRegsForEntryMBB (LiveRegs, *MBB);
1057+ if (HasCall) {
1058+ LiveRegs.addReg (AArch64::X16);
1059+ LiveRegs.addReg (AArch64::X17);
1060+ LiveRegs.addReg (AArch64::X18);
1061+ }
10441062
10451063 // Prefer X9 since it was historically used for the prologue scratch reg.
10461064 const MachineRegisterInfo &MRI = MF->getRegInfo ();
@@ -1081,23 +1099,18 @@ bool AArch64FrameLowering::canUseAsPrologue(
10811099 MBB.isLiveIn (AArch64::NZCV))
10821100 return false ;
10831101
1084- // Don't need a scratch register if we're not going to re-align the stack or
1085- // emit stack probes.
1086- if (!RegInfo->hasStackRealignment (*MF) && !TLI->hasInlineStackProbe (*MF))
1087- return true ;
1088- // Otherwise, we can use any block as long as it has a scratch register
1089- // available.
1090- return findScratchNonCalleeSaveRegister (TmpMBB) != AArch64::NoRegister;
1091- }
1102+ if (RegInfo->hasStackRealignment (*MF) || TLI->hasInlineStackProbe (*MF))
1103+ if (findScratchNonCalleeSaveRegister (TmpMBB) == AArch64::NoRegister)
1104+ return false ;
10921105
1093- static bool windowsRequiresStackProbe (MachineFunction &MF,
1094- uint64_t StackSizeInBytes) {
1095- const AArch64Subtarget &Subtarget = MF. getSubtarget <AArch64Subtarget>();
1096- const AArch64FunctionInfo &MFI = *MF. getInfo <AArch64FunctionInfo>();
1097- // TODO: When implementing stack protectors, take that into account
1098- // for the probe threshold.
1099- return Subtarget. isTargetWindows () && MFI. hasStackProbing () &&
1100- StackSizeInBytes >= uint64_t (MFI. getStackProbeSize ()) ;
1106+ // May need a scratch register (for return value) if require making a special
1107+ // call
1108+ if ( requiresSaveVG (*MF) ||
1109+ windowsRequiresStackProbe ( *MF, std::numeric_limits< uint64_t >:: max ()))
1110+ if ( findScratchNonCalleeSaveRegister (TmpMBB, true ) == AArch64::NoRegister)
1111+ return false ;
1112+
1113+ return true ;
11011114}
11021115
11031116static bool needsWinCFI (const MachineFunction &MF) {
@@ -1378,8 +1391,8 @@ bool requiresGetVGCall(MachineFunction &MF) {
13781391 !MF.getSubtarget <AArch64Subtarget>().hasSVE ();
13791392}
13801393
1381- static bool requiresSaveVG (MachineFunction &MF) {
1382- AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1394+ static bool requiresSaveVG (const MachineFunction &MF) {
1395+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
13831396 // For Darwin platforms we don't save VG for non-SVE functions, even if SME
13841397 // is enabled with streaming mode changes.
13851398 if (!AFI->hasStreamingModeChanges ())
@@ -2049,6 +2062,29 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
20492062 if (AFI->getSVECalleeSavedStackSize ())
20502063 report_fatal_error (
20512064 " SVE callee saves not yet supported with stack probing" );
2065+
2066+ // Find an available register to spill the value of X15 to, if X15 is being
2067+ // used already for nest.
2068+ unsigned X15Scratch = AArch64::NoRegister;
2069+ const AArch64Subtarget &STI = MF.getSubtarget <AArch64Subtarget>();
2070+ if (llvm::any_of (MBB.liveins (),
2071+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
2072+ return STI.getRegisterInfo ()->isSuperOrSubRegisterEq (
2073+ AArch64::X15, LiveIn.PhysReg );
2074+ })) {
2075+ X15Scratch = findScratchNonCalleeSaveRegister (&MBB, true );
2076+ assert (X15Scratch != AArch64::NoRegister &&
2077+ (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
2078+ #ifndef NDEBUG
2079+ LiveRegs.removeReg (AArch64::X15); // ignore X15 since we restore it
2080+ #endif
2081+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::ORRXrr), X15Scratch)
2082+ .addReg (AArch64::XZR)
2083+ .addReg (AArch64::X15, RegState::Undef)
2084+ .addReg (AArch64::X15, RegState::Implicit)
2085+ .setMIFlag (MachineInstr::FrameSetup);
2086+ }
2087+
20522088 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4 ;
20532089 if (NeedsWinCFI) {
20542090 HasWinCFI = true ;
@@ -2171,6 +2207,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
21712207 // we've set a frame pointer and already finished the SEH prologue.
21722208 assert (!NeedsWinCFI);
21732209 }
2210+ if (X15Scratch != AArch64::NoRegister) {
2211+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::ORRXrr), AArch64::X15)
2212+ .addReg (AArch64::XZR)
2213+ .addReg (X15Scratch, RegState::Undef)
2214+ .addReg (X15Scratch, RegState::Implicit)
2215+ .setMIFlag (MachineInstr::FrameSetup);
2216+ }
21742217 }
21752218
21762219 StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
@@ -3355,7 +3398,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
33553398 unsigned X0Scratch = AArch64::NoRegister;
33563399 if (Reg1 == AArch64::VG) {
33573400 // Find an available register to store value of VG to.
3358- Reg1 = findScratchNonCalleeSaveRegister (&MBB);
3401+ Reg1 = findScratchNonCalleeSaveRegister (&MBB, true );
33593402 assert (Reg1 != AArch64::NoRegister);
33603403 SMEAttrs Attrs = AFI->getSMEFnAttrs ();
33613404
0 commit comments