@@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
199199 // push {r0-r10, r12} GPRCS1
200200 // vpush {r8-d15} DPRCS1
201201 // push {r11, lr} GPRCS2
202+ //
203+ // SplitR11AAPCSSignRA:
204+ // push {r0-r10, r12} GPRSC1
205+ // push {r11, lr} GPRCS2
206+ // vpush {r8-d15} DPRCS1
202207
203208 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
204209 // the top of the stack frame.
@@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg,
246251 return SpillArea::GPRCS1;
247252
248253 case ARM::LR:
249- if (Variation == ARMSubtarget::SplitR11WindowsSEH)
254+ if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
255+ Variation == ARMSubtarget::SplitR11AAPCSSignRA)
250256 return SpillArea::GPRCS2;
251257 else
252258 return SpillArea::GPRCS1;
@@ -863,6 +869,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
863869 // This is a conservative estimation: Assume the frame pointer being r7 and
864870 // pc("r15") up to r8 getting spilled before (= 8 registers).
865871 int MaxRegBytes = 8 * 4 ;
872+ if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
873+ // Here, r11 can be stored below all of r4-r15.
874+ MaxRegBytes = 11 * 4 ;
866875 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
867876 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
868877 MaxRegBytes = 11 * 4 + 8 * 8 ;
@@ -935,17 +944,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
935944 }
936945
937946 // Determine spill area sizes, and some important frame indices.
947+ SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
948+ bool BeforeFPPush = true ;
938949 for (const CalleeSavedInfo &I : CSI) {
939950 Register Reg = I.getReg ();
940951 int FI = I.getFrameIdx ();
941952
942- if (Reg == FramePtr)
953+ SpillArea Area = getSpillArea (Reg, PushPopSplit,
954+ AFI->getNumAlignedDPRCS2Regs (), RegInfo);
955+
956+ if (Reg == FramePtr) {
943957 FramePtrSpillFI = FI;
958+ FramePtrSpillArea = Area;
959+ }
944960 if (Reg == ARM::D8)
945961 D8SpillFI = FI;
946962
947- switch (getSpillArea (Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs (),
948- RegInfo)) {
963+ switch (Area) {
949964 case SpillArea::FPCXT:
950965 FPCXTSaveSize += 4 ;
951966 break ;
@@ -972,21 +987,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
972987 // Move past FPCXT area.
973988 if (FPCXTSaveSize > 0 ) {
974989 LastPush = MBBI++;
975- DefCFAOffsetCandidates.addInst (LastPush, FPCXTSaveSize, true );
990+ DefCFAOffsetCandidates.addInst (LastPush, FPCXTSaveSize, BeforeFPPush );
976991 }
977992
978993 // Allocate the vararg register save area.
979994 if (ArgRegsSaveSize) {
980995 emitSPUpdate (isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
981996 MachineInstr::FrameSetup);
982997 LastPush = std::prev (MBBI);
983- DefCFAOffsetCandidates.addInst (LastPush, ArgRegsSaveSize, true );
998+ DefCFAOffsetCandidates.addInst (LastPush, ArgRegsSaveSize, BeforeFPPush );
984999 }
9851000
9861001 // Move past area 1.
9871002 if (GPRCS1Size > 0 ) {
9881003 GPRCS1Push = LastPush = MBBI++;
989- DefCFAOffsetCandidates.addInst (LastPush, GPRCS1Size, true );
1004+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS1Size, BeforeFPPush);
1005+ if (FramePtrSpillArea == SpillArea::GPRCS1)
1006+ BeforeFPPush = false ;
9901007 }
9911008
9921009 // Determine starting offsets of spill areas. These offsets are all positive
@@ -1010,21 +1027,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10101027 } else {
10111028 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
10121029 }
1013- int FramePtrOffsetInPush = 0 ;
10141030 if (HasFP) {
10151031 // Offset from the CFA to the saved frame pointer, will be negative.
10161032 [[maybe_unused]] int FPOffset = MFI.getObjectOffset (FramePtrSpillFI);
10171033 LLVM_DEBUG (dbgs () << " FramePtrSpillFI: " << FramePtrSpillFI
10181034 << " , FPOffset: " << FPOffset << " \n " );
10191035 assert (getMaxFPOffset (STI, *AFI, MF) <= FPOffset &&
10201036 " Max FP estimation is wrong" );
1021- // Offset from the top of the GPRCS1 area to the saved frame pointer, will
1022- // be negative.
1023- FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
1024- LLVM_DEBUG (dbgs () << " FramePtrOffsetInPush=" << FramePtrOffsetInPush
1025- << " , FramePtrSpillOffset="
1026- << (MFI.getObjectOffset (FramePtrSpillFI) + NumBytes)
1027- << " \n " );
10281037 AFI->setFramePtrSpillOffset (MFI.getObjectOffset (FramePtrSpillFI) +
10291038 NumBytes);
10301039 }
@@ -1036,7 +1045,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10361045 // after DPRCS1.
10371046 if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
10381047 GPRCS2Push = LastPush = MBBI++;
1039- DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
1048+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size, BeforeFPPush);
1049+ if (FramePtrSpillArea == SpillArea::GPRCS2)
1050+ BeforeFPPush = false ;
10401051 }
10411052
10421053 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -1049,7 +1060,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10491060 else {
10501061 emitSPUpdate (isARM, MBB, MBBI, dl, TII, -DPRGapSize,
10511062 MachineInstr::FrameSetup);
1052- DefCFAOffsetCandidates.addInst (std::prev (MBBI), DPRGapSize);
1063+ DefCFAOffsetCandidates.addInst (std::prev (MBBI), DPRGapSize, BeforeFPPush );
10531064 }
10541065 }
10551066
@@ -1058,7 +1069,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10581069 // Since vpush register list cannot have gaps, there may be multiple vpush
10591070 // instructions in the prologue.
10601071 while (MBBI != MBB.end () && MBBI->getOpcode () == ARM::VSTMDDB_UPD) {
1061- DefCFAOffsetCandidates.addInst (MBBI, sizeOfSPAdjustment (*MBBI));
1072+ DefCFAOffsetCandidates.addInst (MBBI, sizeOfSPAdjustment (*MBBI),
1073+ BeforeFPPush);
10621074 LastPush = MBBI++;
10631075 }
10641076 }
@@ -1077,7 +1089,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10771089 // Move GPRCS2, if using using SplitR11WindowsSEH.
10781090 if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
10791091 GPRCS2Push = LastPush = MBBI++;
1080- DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
1092+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size, BeforeFPPush);
1093+ if (FramePtrSpillArea == SpillArea::GPRCS2)
1094+ BeforeFPPush = false ;
10811095 }
10821096
10831097 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
@@ -1178,28 +1192,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
11781192 // into spill area 1, including the FP in R11. In either case, it
11791193 // is in area one and the adjustment needs to take place just after
11801194 // that push.
1181- // FIXME: The above is not necessary true when PACBTI is enabled.
1182- // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1183- // so FP ends up on area two.
11841195 MachineBasicBlock::iterator AfterPush;
11851196 if (HasFP) {
1186- AfterPush = std::next (GPRCS1Push);
1187- unsigned PushSize = sizeOfSPAdjustment (*GPRCS1Push);
1188- int FPOffset = PushSize + FramePtrOffsetInPush;
1189- if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
1190- AfterPush = std::next (GPRCS2Push);
1191- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1192- FramePtr, ARM::SP, 0 , MachineInstr::FrameSetup);
1193- } else {
1194- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1195- FramePtr, ARM::SP, FPOffset,
1196- MachineInstr::FrameSetup);
1197+ MachineBasicBlock::iterator FPPushInst;
1198+ // Offset from SP immediately after the push which saved the FP to the FP
1199+ // save slot.
1200+ int64_t FPOffsetAfterPush;
1201+ switch (FramePtrSpillArea) {
1202+ case SpillArea::GPRCS1:
1203+ FPPushInst = GPRCS1Push;
1204+ FPOffsetAfterPush = MFI.getObjectOffset (FramePtrSpillFI) +
1205+ ArgRegsSaveSize + FPCXTSaveSize +
1206+ sizeOfSPAdjustment (*FPPushInst);
1207+ LLVM_DEBUG (dbgs () << " Frame pointer in GPRCS1, offset "
1208+ << FPOffsetAfterPush << " after that push\n " );
1209+ break ;
1210+ case SpillArea::GPRCS2:
1211+ FPPushInst = GPRCS2Push;
1212+ FPOffsetAfterPush = MFI.getObjectOffset (FramePtrSpillFI) +
1213+ ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1214+ sizeOfSPAdjustment (*FPPushInst);
1215+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1216+ FPOffsetAfterPush += DPRCSSize + DPRGapSize;
1217+ LLVM_DEBUG (dbgs () << " Frame pointer in GPRCS2, offset "
1218+ << FPOffsetAfterPush << " after that push\n " );
1219+ break ;
1220+ default :
1221+ llvm_unreachable (" frame pointer in unknown spill area" );
1222+ break ;
11971223 }
1224+ AfterPush = std::next (FPPushInst);
1225+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1226+ assert (FPOffsetAfterPush == 0 );
1227+
1228+ // Emit the MOV or ADD to set up the frame pointer register.
1229+ emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1230+ FramePtr, ARM::SP, FPOffsetAfterPush,
1231+ MachineInstr::FrameSetup);
1232+
11981233 if (!NeedsWinCFI) {
1199- if (FramePtrOffsetInPush + PushSize != 0 ) {
1234+ // Emit DWARF info to find the CFA using the frame pointer from this
1235+ // point onward.
1236+ if (FPOffsetAfterPush != 0 ) {
12001237 unsigned CFIIndex = MF.addFrameInst (MCCFIInstruction::cfiDefCfa (
12011238 nullptr , MRI->getDwarfRegNum (FramePtr, true ),
1202- FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush ));
1239+ -MFI. getObjectOffset (FramePtrSpillFI) ));
12031240 BuildMI (MBB, AfterPush, dl, TII.get (TargetOpcode::CFI_INSTRUCTION))
12041241 .addCFIIndex (CFIIndex)
12051242 .setMIFlags (MachineInstr::FrameSetup);
@@ -1712,7 +1749,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
17121749 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
17131750 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore () == 0 &&
17141751 STI.hasV5TOps () && MBB.succ_empty () && !hasPAC &&
1715- PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
1752+ (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1753+ PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
17161754 Reg = ARM::PC;
17171755 // Fold the return instruction into the LDM.
17181756 DeleteRet = true ;
@@ -2945,18 +2983,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
29452983 const auto &AFI = *MF.getInfo <ARMFunctionInfo>();
29462984 if (AFI.shouldSignReturnAddress ()) {
29472985 // The order of register must match the order we push them, because the
2948- // PEI assigns frame indices in that order. When compiling for return
2949- // address sign and authenication, we use split push, therefore the orders
2950- // we want are:
2951- // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2952- CSI.insert (find_if (CSI,
2953- [=](const auto &CS) {
2954- Register Reg = CS.getReg ();
2955- return Reg == ARM::R10 || Reg == ARM::R11 ||
2956- Reg == ARM::R8 || Reg == ARM::R9 ||
2957- ARM::DPRRegClass.contains (Reg);
2958- }),
2959- CalleeSavedInfo (ARM::R12));
2986+ // PEI assigns frame indices in that order. That order depends on the
2987+ // PushPopSplitVariation, there are only two cases which we use with return
2988+ // address signing:
2989+ switch (STI.getPushPopSplitVariation (MF)) {
2990+ case ARMSubtarget::SplitR7:
2991+ // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2992+ CSI.insert (find_if (CSI,
2993+ [=](const auto &CS) {
2994+ Register Reg = CS.getReg ();
2995+ return Reg == ARM::R10 || Reg == ARM::R11 ||
2996+ Reg == ARM::R8 || Reg == ARM::R9 ||
2997+ ARM::DPRRegClass.contains (Reg);
2998+ }),
2999+ CalleeSavedInfo (ARM::R12));
3000+ break ;
3001+ case ARMSubtarget::SplitR11AAPCSSignRA:
3002+ // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3003+ // on the stack.
3004+ CSI.insert (CSI.begin (), CalleeSavedInfo (ARM::R12));
3005+ break ;
3006+ default :
3007+ llvm_unreachable (" Unexpected CSR split with return address signing" );
3008+ }
29603009 }
29613010
29623011 return false ;
0 commit comments