@@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
199199 // push {r0-r10, r12} GPRCS1
200200 // vpush {r8-d15} DPRCS1
201201 // push {r11, lr} GPRCS2
202+ //
203+ // SplitR11AAPCSSignRA:
204+ // push {r0-r10, r12} GPRSC1
205+ // push {r11, lr} GPRCS2
206+ // vpush {r8-d15} DPRCS1
202207
203208 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
204209 // the top of the stack frame.
@@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg,
246251 return SpillArea::GPRCS1;
247252
248253 case ARM::LR:
249- if (Variation == ARMSubtarget::SplitR11WindowsSEH)
254+ if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
255+ Variation == ARMSubtarget::SplitR11AAPCSSignRA)
250256 return SpillArea::GPRCS2;
251257 else
252258 return SpillArea::GPRCS1;
@@ -859,6 +865,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
859865 // This is a conservative estimation: Assume the frame pointer being r7 and
860866 // pc("r15") up to r8 getting spilled before (= 8 registers).
861867 int MaxRegBytes = 8 * 4 ;
868+ if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
869+ // Here, r11 can be stored below all of r4-r15.
870+ MaxRegBytes = 11 * 4 ;
862871 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
863872 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
864873 MaxRegBytes = 11 * 4 + 8 * 8 ;
@@ -931,17 +940,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
931940 }
932941
933942 // Determine spill area sizes, and some important frame indices.
943+ SpillArea FramePtrSpillArea;
944+ bool BeforeFPPush = true ;
934945 for (const CalleeSavedInfo &I : CSI) {
935946 Register Reg = I.getReg ();
936947 int FI = I.getFrameIdx ();
937948
938- if (Reg == FramePtr)
949+ SpillArea Area = getSpillArea (Reg, PushPopSplit,
950+ AFI->getNumAlignedDPRCS2Regs (), RegInfo);
951+
952+ if (Reg == FramePtr) {
939953 FramePtrSpillFI = FI;
954+ FramePtrSpillArea = Area;
955+ }
940956 if (Reg == ARM::D8)
941957 D8SpillFI = FI;
942958
943- switch (getSpillArea (Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs (),
944- RegInfo)) {
959+ switch (Area) {
945960 case SpillArea::FPCXT:
946961 FPCXTSaveSize += 4 ;
947962 break ;
@@ -968,21 +983,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
968983 // Move past FPCXT area.
969984 if (FPCXTSaveSize > 0 ) {
970985 LastPush = MBBI++;
971- DefCFAOffsetCandidates.addInst (LastPush, FPCXTSaveSize, true );
986+ DefCFAOffsetCandidates.addInst (LastPush, FPCXTSaveSize, BeforeFPPush );
972987 }
973988
974989 // Allocate the vararg register save area.
975990 if (ArgRegsSaveSize) {
976991 emitSPUpdate (isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
977992 MachineInstr::FrameSetup);
978993 LastPush = std::prev (MBBI);
979- DefCFAOffsetCandidates.addInst (LastPush, ArgRegsSaveSize, true );
994+ DefCFAOffsetCandidates.addInst (LastPush, ArgRegsSaveSize, BeforeFPPush );
980995 }
981996
982997 // Move past area 1.
983998 if (GPRCS1Size > 0 ) {
984999 GPRCS1Push = LastPush = MBBI++;
985- DefCFAOffsetCandidates.addInst (LastPush, GPRCS1Size, true );
1000+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS1Size, BeforeFPPush);
1001+ if (FramePtrSpillArea == SpillArea::GPRCS1)
1002+ BeforeFPPush = false ;
9861003 }
9871004
9881005 // Determine starting offsets of spill areas. These offsets are all positive
@@ -1006,21 +1023,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10061023 } else {
10071024 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
10081025 }
1009- int FramePtrOffsetInPush = 0 ;
10101026 if (HasFP) {
10111027 // Offset from the CFA to the saved frame pointer, will be negative.
10121028 int FPOffset = MFI.getObjectOffset (FramePtrSpillFI);
10131029 LLVM_DEBUG (dbgs () << " FramePtrSpillFI: " << FramePtrSpillFI
10141030 << " , FPOffset: " << FPOffset << " \n " );
10151031 assert (getMaxFPOffset (STI, *AFI, MF) <= FPOffset &&
10161032 " Max FP estimation is wrong" );
1017- // Offset from the top of the GPRCS1 area to the saved frame pointer, will
1018- // be negative.
1019- FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
1020- LLVM_DEBUG (dbgs () << " FramePtrOffsetInPush=" << FramePtrOffsetInPush
1021- << " , FramePtrSpillOffset="
1022- << (MFI.getObjectOffset (FramePtrSpillFI) + NumBytes)
1023- << " \n " );
10241033 AFI->setFramePtrSpillOffset (MFI.getObjectOffset (FramePtrSpillFI) +
10251034 NumBytes);
10261035 }
@@ -1032,7 +1041,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10321041 // after DPRCS1.
10331042 if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
10341043 GPRCS2Push = LastPush = MBBI++;
1035- DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
1044+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size, BeforeFPPush);
1045+ if (FramePtrSpillArea == SpillArea::GPRCS2)
1046+ BeforeFPPush = false ;
10361047 }
10371048
10381049 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -1045,7 +1056,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10451056 else {
10461057 emitSPUpdate (isARM, MBB, MBBI, dl, TII, -DPRGapSize,
10471058 MachineInstr::FrameSetup);
1048- DefCFAOffsetCandidates.addInst (std::prev (MBBI), DPRGapSize);
1059+ DefCFAOffsetCandidates.addInst (std::prev (MBBI), DPRGapSize, BeforeFPPush );
10491060 }
10501061 }
10511062
@@ -1054,7 +1065,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10541065 // Since vpush register list cannot have gaps, there may be multiple vpush
10551066 // instructions in the prologue.
10561067 while (MBBI != MBB.end () && MBBI->getOpcode () == ARM::VSTMDDB_UPD) {
1057- DefCFAOffsetCandidates.addInst (MBBI, sizeOfSPAdjustment (*MBBI));
1068+ DefCFAOffsetCandidates.addInst (MBBI, sizeOfSPAdjustment (*MBBI),
1069+ BeforeFPPush);
10581070 LastPush = MBBI++;
10591071 }
10601072 }
@@ -1073,7 +1085,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10731085 // Move GPRCS2, if using using SplitR11WindowsSEH.
10741086 if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
10751087 GPRCS2Push = LastPush = MBBI++;
1076- DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
1088+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size, BeforeFPPush);
1089+ if (FramePtrSpillArea == SpillArea::GPRCS2)
1090+ BeforeFPPush = false ;
10771091 }
10781092
10791093 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
@@ -1174,28 +1188,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
11741188 // into spill area 1, including the FP in R11. In either case, it
11751189 // is in area one and the adjustment needs to take place just after
11761190 // that push.
1177- // FIXME: The above is not necessary true when PACBTI is enabled.
1178- // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1179- // so FP ends up on area two.
11801191 MachineBasicBlock::iterator AfterPush;
11811192 if (HasFP) {
1182- AfterPush = std::next (GPRCS1Push);
1183- unsigned PushSize = sizeOfSPAdjustment (*GPRCS1Push);
1184- int FPOffset = PushSize + FramePtrOffsetInPush;
1185- if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
1186- AfterPush = std::next (GPRCS2Push);
1187- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1188- FramePtr, ARM::SP, 0 , MachineInstr::FrameSetup);
1189- } else {
1190- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1191- FramePtr, ARM::SP, FPOffset,
1192- MachineInstr::FrameSetup);
1193+ MachineBasicBlock::iterator FPPushInst;
1194+ // Offset from SP immediately after the push which saved the FP to the FP
1195+ // save slot.
1196+ int64_t FPOffsetAfterPush;
1197+ switch (FramePtrSpillArea) {
1198+ case SpillArea::GPRCS1:
1199+ FPPushInst = GPRCS1Push;
1200+ FPOffsetAfterPush = MFI.getObjectOffset (FramePtrSpillFI) +
1201+ ArgRegsSaveSize + FPCXTSaveSize +
1202+ sizeOfSPAdjustment (*FPPushInst);
1203+ LLVM_DEBUG (dbgs () << " Frame pointer in GPRCS1, offset "
1204+ << FPOffsetAfterPush << " after that push\n " );
1205+ break ;
1206+ case SpillArea::GPRCS2:
1207+ FPPushInst = GPRCS2Push;
1208+ FPOffsetAfterPush = MFI.getObjectOffset (FramePtrSpillFI) +
1209+ ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1210+ sizeOfSPAdjustment (*FPPushInst);
1211+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1212+ FPOffsetAfterPush += DPRCSSize + DPRGapSize;
1213+ LLVM_DEBUG (dbgs () << " Frame pointer in GPRCS2, offset "
1214+ << FPOffsetAfterPush << " after that push\n " );
1215+ break ;
1216+ default :
1217+ llvm_unreachable (" frame pointer in unknown spill area" );
1218+ break ;
11931219 }
1220+ AfterPush = std::next (FPPushInst);
1221+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1222+ assert (FPOffsetAfterPush == 0 );
1223+
1224+ // Emit the MOV or ADD to set up the frame pointer register.
1225+ emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1226+ FramePtr, ARM::SP, FPOffsetAfterPush,
1227+ MachineInstr::FrameSetup);
1228+
11941229 if (!NeedsWinCFI) {
1195- if (FramePtrOffsetInPush + PushSize != 0 ) {
1230+ // Emit DWARF info to find the CFA using the frame pointer from this
1231+ // point onward.
1232+ if (FPOffsetAfterPush != 0 ) {
11961233 unsigned CFIIndex = MF.addFrameInst (MCCFIInstruction::cfiDefCfa (
11971234 nullptr , MRI->getDwarfRegNum (FramePtr, true ),
1198- FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush ));
1235+ -MFI. getObjectOffset (FramePtrSpillFI) ));
11991236 BuildMI (MBB, AfterPush, dl, TII.get (TargetOpcode::CFI_INSTRUCTION))
12001237 .addCFIIndex (CFIIndex)
12011238 .setMIFlags (MachineInstr::FrameSetup);
@@ -1708,7 +1745,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
17081745 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
17091746 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore () == 0 &&
17101747 STI.hasV5TOps () && MBB.succ_empty () && !hasPAC &&
1711- PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
1748+ (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1749+ PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
17121750 Reg = ARM::PC;
17131751 // Fold the return instruction into the LDM.
17141752 DeleteRet = true ;
@@ -2940,18 +2978,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
29402978 const auto &AFI = *MF.getInfo <ARMFunctionInfo>();
29412979 if (AFI.shouldSignReturnAddress ()) {
29422980 // The order of register must match the order we push them, because the
2943- // PEI assigns frame indices in that order. When compiling for return
2944- // address sign and authenication, we use split push, therefore the orders
2945- // we want are:
2946- // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2947- CSI.insert (find_if (CSI,
2948- [=](const auto &CS) {
2949- Register Reg = CS.getReg ();
2950- return Reg == ARM::R10 || Reg == ARM::R11 ||
2951- Reg == ARM::R8 || Reg == ARM::R9 ||
2952- ARM::DPRRegClass.contains (Reg);
2953- }),
2954- CalleeSavedInfo (ARM::R12));
2981+ // PEI assigns frame indices in that order. That order depends on the
2982+ // PushPopSplitVariation, there are only two cases which we use with return
2983+ // address signing:
2984+ switch (STI.getPushPopSplitVariation (MF)) {
2985+ case ARMSubtarget::SplitR7:
2986+ // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2987+ CSI.insert (find_if (CSI,
2988+ [=](const auto &CS) {
2989+ Register Reg = CS.getReg ();
2990+ return Reg == ARM::R10 || Reg == ARM::R11 ||
2991+ Reg == ARM::R8 || Reg == ARM::R9 ||
2992+ ARM::DPRRegClass.contains (Reg);
2993+ }),
2994+ CalleeSavedInfo (ARM::R12));
2995+ break ;
2996+ case ARMSubtarget::SplitR11AAPCSSignRA:
2997+ // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
2998+ // on the stack.
2999+ CSI.insert (CSI.begin (), CalleeSavedInfo (ARM::R12));
3000+ break ;
3001+ default :
3002+ llvm_unreachable (" Unexpected CSR split with return address signing" );
3003+ }
29553004 }
29563005
29573006 return false ;
0 commit comments