Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 86 additions & 75 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ enum class SpillArea {
GPRCS2,
DPRCS1,
DPRCS2,
GPRCS3,
FPCXT,
};

Expand All @@ -197,7 +198,7 @@ SpillArea getSpillArea(Register Reg,
// SplitR11WindowsSEH:
// push {r0-r10, r12} GPRCS1
// vpush {r8-d15} DPRCS1
// push {r11, lr} GPRCS2
// push {r11, lr} GPRCS3
//
// SplitR11AAPCSSignRA:
// push {r0-r10, r12} GPRSC1
Expand Down Expand Up @@ -238,10 +239,13 @@ SpillArea getSpillArea(Register Reg,
return SpillArea::GPRCS1;

case ARM::R11:
if (Variation == ARMSubtarget::NoSplit)
return SpillArea::GPRCS1;
else
if (Variation == ARMSubtarget::SplitR7 ||
Variation == ARMSubtarget::SplitR11AAPCSSignRA)
return SpillArea::GPRCS2;
if (Variation == ARMSubtarget::SplitR11WindowsSEH)
return SpillArea::GPRCS3;

return SpillArea::GPRCS1;

case ARM::R12:
if (Variation == ARMSubtarget::SplitR7)
Expand All @@ -250,11 +254,12 @@ SpillArea getSpillArea(Register Reg,
return SpillArea::GPRCS1;

case ARM::LR:
if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
Variation == ARMSubtarget::SplitR11AAPCSSignRA)
if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
return SpillArea::GPRCS2;
else
return SpillArea::GPRCS1;
if (Variation == ARMSubtarget::SplitR11WindowsSEH)
return SpillArea::GPRCS3;

return SpillArea::GPRCS1;

case ARM::D0:
case ARM::D1:
Expand Down Expand Up @@ -912,7 +917,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,

// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCS1Size = 0, GPRCS3Size = 0,
DPRCS2Size = 0;
int FramePtrSpillFI = 0;
int D8SpillFI = 0;

Expand Down Expand Up @@ -970,14 +976,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
GPRCS2Size += 4;
break;
case SpillArea::DPRCS1:
DPRCSSize += 8;
DPRCS1Size += 8;
break;
case SpillArea::GPRCS3:
GPRCS3Size += 4;
break;
case SpillArea::DPRCS2:
DPRCS2Size += 4;
break;
}
}

MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
DPRCS1Push, GPRCS3Push;

// Move past the PAC computation.
if (AFI->shouldSignReturnAddress())
Expand Down Expand Up @@ -1012,20 +1023,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
if (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
DPRGapSize += GPRCS2Size;
}
DPRGapSize %= DPRAlign.value();

unsigned DPRCSOffset;
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
GPRCS2Offset = DPRCSOffset - GPRCS2Size;
} else {
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
}
Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4);
unsigned DPRGapSize =
(ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size + GPRCS2Size) %
DPRAlign.value();

unsigned DPRCS1Offset = GPRCS2Offset - DPRGapSize - DPRCS1Size;

if (HasFP) {
// Offset from the CFA to the saved frame pointer, will be negative.
[[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
Expand All @@ -1038,11 +1043,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);

// Move GPRCS2, unless using SplitR11WindowsSEH, in which case it will be
// after DPRCS1.
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
// Move past area 2.
if (GPRCS2Size > 0) {
assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS2)
Expand All @@ -1063,33 +1068,34 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}

// Move past DPRCS1.
if (DPRCSSize > 0) {
// Move past DPRCS1Size.
if (DPRCS1Size > 0) {
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
BeforeFPPush);
LastPush = MBBI++;
DPRCS1Push = LastPush = MBBI++;
}
}

// Move past the aligned DPRCS2 area.
if (AFI->getNumAlignedDPRCS2Regs() > 0) {
if (DPRCS2Size > 0) {
MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
// The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
// leaves the stack pointer pointing to the DPRCS2 area.
//
// Adjust NumBytes to represent the stack slots below the DPRCS2 area.
NumBytes += MFI.getObjectOffset(D8SpillFI);
} else
NumBytes = DPRCSOffset;

// Move GPRCS2, if using using SplitR11WindowsSEH.
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS2)
NumBytes = DPRCS1Offset;

// Move GPRCS3, if using using SplitR11WindowsSEH.
if (GPRCS3Size > 0) {
assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
Copy link
Contributor Author

@pestctrl pestctrl Nov 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left these asserts here to serve as reminders of what GPRCS2Size > 0 and GPRCS3Size > 0 likewise imply, but someone doesn't like them being here, I can remove them. Checking for GPRCS2Size/GPRCS3Size provides all the information that the frame code needs to know.

GPRCS3Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS3Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS3)
BeforeFPPush = false;
}

Expand Down Expand Up @@ -1211,11 +1217,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
sizeOfSPAdjustment(*FPPushInst);
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
FPOffsetAfterPush += DPRCSSize + DPRGapSize;
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
<< FPOffsetAfterPush << " after that push\n");
break;
case SpillArea::GPRCS3:
FPPushInst = GPRCS3Push;
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to include GPRCS2Size here for consistency, or at least assert that it is zero, so that this will be correct of we ever start using GPRCS2 and GPRCS3 together.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with including GPRCS2Size here.

GPRCS2Size + DPRCS1Size + DPRGapSize +
sizeOfSPAdjustment(*FPPushInst);
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
<< FPOffsetAfterPush << " after that push\n");
break;
default:
llvm_unreachable("frame pointer in unknown spill area");
break;
Expand Down Expand Up @@ -1279,7 +1292,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
CFIPos = std::next(GPRCS2Push);
break;
case SpillArea::DPRCS1:
CFIPos = std::next(LastPush);
CFIPos = std::next(DPRCS1Push);
break;
case SpillArea::GPRCS3:
CFIPos = std::next(GPRCS3Push);
break;
case SpillArea::FPCXT:
case SpillArea::DPRCS2:
Expand Down Expand Up @@ -1317,7 +1333,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedGapSize(DPRGapSize);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);

// If we need dynamic stack realignment, do it here. Be paranoid and make
// sure if we also have VLAs, we have a base pointer for frame access.
Expand Down Expand Up @@ -1438,12 +1455,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
}

// Move SP to start of FP callee save spill area.
NumBytes -= (ReservedArgStack +
AFI->getFPCXTSaveAreaSize() +
AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedGapSize() +
AFI->getDPRCalleeSavedAreaSize());
NumBytes -=
(ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedGapSize() + AFI->getDPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea3Size());

// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
Expand Down Expand Up @@ -1491,11 +1507,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);

// Increment past our save areas.
if (AFI->getGPRCalleeSavedArea2Size() &&
PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
if (AFI->getGPRCalleeSavedArea3Size()) {
assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
MBBI++;
}

if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
MBBI++;
// Since vpop register list cannot have gaps, there may be multiple vpop
// instructions in the epilogue.
Expand All @@ -1509,9 +1526,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineInstr::FrameDestroy);
}

if (AFI->getGPRCalleeSavedArea2Size() &&
PushPopSplit != ARMSubtarget::SplitR11WindowsSEH)
if (AFI->getGPRCalleeSavedArea2Size()) {
assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
MBBI++;
}
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;

if (ReservedArgStack || IncomingArgStackToRestore) {
Expand Down Expand Up @@ -2128,19 +2146,14 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
return CheckRegArea(Reg, SpillArea::DPRCS1);
};
auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
return CheckRegArea(Reg, SpillArea::GPRCS3);
};

// Windows SEH requires the floating-point registers to be pushed between the
// two blocks of GPRs in some situations. In all other cases, they are pushed
// below the GPRs.
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
} else {
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
}
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS3);

// The code above does not insert spill code for the aligned DPRCS2 registers.
// The stack realignment code will be inserted between the push instructions
Expand Down Expand Up @@ -2190,16 +2203,14 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
return CheckRegArea(Reg, SpillArea::DPRCS1);
};
auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
return CheckRegArea(Reg, SpillArea::GPRCS3);
};

if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
} else {
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
}
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS3);
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);

return true;
}
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// Some may be spilled after the stack has been realigned.
unsigned GPRCS1Offset = 0;
unsigned GPRCS2Offset = 0;
unsigned DPRCSOffset = 0;
unsigned DPRCS1Offset = 0;

/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
Expand All @@ -90,7 +90,8 @@ class ARMFunctionInfo : public MachineFunctionInfo {
unsigned GPRCS1Size = 0;
unsigned GPRCS2Size = 0;
unsigned DPRCSAlignGapSize = 0;
unsigned DPRCSSize = 0;
unsigned DPRCS1Size = 0;
unsigned GPRCS3Size = 0;

/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
/// the aligned portion of the stack frame. This is always a contiguous
Expand Down Expand Up @@ -194,25 +195,27 @@ class ARMFunctionInfo : public MachineFunctionInfo {

unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
unsigned getDPRCalleeSavedArea1Offset() const { return DPRCS1Offset; }

void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
void setDPRCalleeSavedArea1Offset(unsigned o) { DPRCS1Offset = o; }

unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; }
unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; }
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
unsigned getDPRCalleeSavedArea1Size() const { return DPRCS1Size; }
unsigned getGPRCalleeSavedArea3Size() const { return GPRCS3Size; }

void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; }
void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
void setDPRCalleeSavedArea1Size(unsigned s) { DPRCS1Size = s; }
void setGPRCalleeSavedArea3Size(unsigned s) { GPRCS3Size = s; }

unsigned getArgumentStackSize() const { return ArgumentStackSize; }
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
Expand Down
13 changes: 6 additions & 7 deletions llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
AFI->setFrameRecordSavedAreaSize(FRSize);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
AFI->setDPRCalleeSavedArea1Offset(DPRCSOffset);
NumBytes = DPRCSOffset;

int FramePtrOffsetInBlock = 0;
Expand Down Expand Up @@ -440,7 +440,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,

AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
AFI->setDPRCalleeSavedArea1Size(DPRCSSize);

if (RegInfo->hasStackRealignment(MF)) {
const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
Expand Down Expand Up @@ -526,11 +526,10 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}

// Move SP to start of FP callee save spill area.
NumBytes -= (AFI->getFrameRecordSavedAreaSize() +
AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize() +
ArgRegsSaveSize);
NumBytes -=
(AFI->getFrameRecordSavedAreaSize() +
AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedArea1Size() + ArgRegsSaveSize);

// We are likely to need a scratch register and we know all callee-save
// registers are free at this point in the epilogue, so pick one.
Expand Down
Loading