|
126 | 126 | // and the SME unit try to access the same area of memory, including if the |
127 | 127 | // access is to an area of the stack. To try to alleviate this we attempt to |
128 | 128 | // introduce extra padding into the stack frame between FP and GPR accesses, |
129 | | -// controlled by the StackHazardSize option. Without changing the layout of the |
130 | | -// stack frame in the diagram above, a stack object of size StackHazardSize is |
131 | | -// added between GPR and FPR CSRs. Another is added to the stack objects |
132 | | -// section, and stack objects are sorted so that FPR > Hazard padding slot > |
133 | | -// GPRs (where possible). Unfortunately some things are not handled well (VLA |
134 | | -// area, arguments on the stack, object with both GPR and FPR accesses), but if |
135 | | -// those are controlled by the user then the entire stack frame becomes GPR at |
136 | | -// the start/end with FPR in the middle, surrounded by Hazard padding. |
| 129 | +// controlled by the aarch64-stack-hazard-size option. Without changing the |
| 130 | +// layout of the stack frame in the diagram above, a stack object of size |
| 131 | +// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added |
| 132 | +// to the stack objects section, and stack objects are sorted so that FPR > |
| 133 | +// Hazard padding slot > GPRs (where possible). Unfortunately some things are |
| 134 | +// not handled well (VLA area, arguments on the stack, objects with both GPR and |
| 135 | +// FPR accesses), but if those are controlled by the user then the entire stack |
| 136 | +// frame becomes GPR at the start/end with FPR in the middle, surrounded by |
| 137 | +// Hazard padding. |
137 | 138 | // |
138 | 139 | // An example of the prologue: |
139 | 140 | // |
@@ -273,9 +274,6 @@ cl::opt<bool> EnableHomogeneousPrologEpilog( |
273 | 274 | cl::desc("Emit homogeneous prologue and epilogue for the size " |
274 | 275 | "optimization (default = off)")); |
275 | 276 |
|
276 | | -// Stack hazard padding size. 0 = disabled. |
277 | | -static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size", |
278 | | - cl::init(0), cl::Hidden); |
279 | 277 | // Stack hazard size for analysis remarks. StackHazardSize takes precedence. |
280 | 278 | static cl::opt<unsigned> |
281 | 279 | StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), |
@@ -1617,6 +1615,10 @@ static bool isTargetWindows(const MachineFunction &MF) { |
1617 | 1615 | return MF.getSubtarget<AArch64Subtarget>().isTargetWindows(); |
1618 | 1616 | } |
1619 | 1617 |
|
| 1618 | +static unsigned getStackHazardSize(const MachineFunction &MF) { |
| 1619 | + return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize(); |
| 1620 | +} |
| 1621 | + |
1620 | 1622 | // Convenience function to determine whether I is an SVE callee save. |
1621 | 1623 | static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { |
1622 | 1624 | switch (I->getOpcode()) { |
@@ -2988,6 +2990,7 @@ static void computeCalleeSaveRegisterPairs( |
2988 | 2990 | bool IsWindows = isTargetWindows(MF); |
2989 | 2991 | bool NeedsWinCFI = needsWinCFI(MF); |
2990 | 2992 | AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); |
| 2993 | + unsigned StackHazardSize = getStackHazardSize(MF); |
2991 | 2994 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2992 | 2995 | CallingConv::ID CC = MF.getFunction().getCallingConv(); |
2993 | 2996 | unsigned Count = CSI.size(); |
@@ -3615,6 +3618,7 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI, |
3615 | 3618 | // which can be used to determine if any hazard padding is needed. |
3616 | 3619 | void AArch64FrameLowering::determineStackHazardSlot( |
3617 | 3620 | MachineFunction &MF, BitVector &SavedRegs) const { |
| 3621 | + unsigned StackHazardSize = getStackHazardSize(MF); |
3618 | 3622 | if (StackHazardSize == 0 || StackHazardSize % 16 != 0 || |
3619 | 3623 | MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex()) |
3620 | 3624 | return; |
@@ -3805,7 +3809,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, |
3805 | 3809 | // StackHazardSize if so. |
3806 | 3810 | determineStackHazardSlot(MF, SavedRegs); |
3807 | 3811 | if (AFI->hasStackHazardSlotIndex()) |
3808 | | - CSStackSize += StackHazardSize; |
| 3812 | + CSStackSize += getStackHazardSize(MF); |
3809 | 3813 |
|
3810 | 3814 | // Save number of saved regs, so we can easily update CSStackSize later. |
3811 | 3815 | unsigned NumSavedRegs = SavedRegs.count(); |
@@ -3920,6 +3924,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( |
3920 | 3924 | std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex, |
3921 | 3925 | unsigned &MaxCSFrameIndex) const { |
3922 | 3926 | bool NeedsWinCFI = needsWinCFI(MF); |
| 3927 | + unsigned StackHazardSize = getStackHazardSize(MF); |
3923 | 3928 | // To match the canonical windows frame layout, reverse the list of |
3924 | 3929 | // callee saved registers to get them laid out by PrologEpilogInserter |
3925 | 3930 | // in the right order. (PrologEpilogInserter allocates stack objects top |
@@ -5154,6 +5159,7 @@ void AArch64FrameLowering::emitRemarks( |
5154 | 5159 | if (Attrs.hasNonStreamingInterfaceAndBody()) |
5155 | 5160 | return; |
5156 | 5161 |
|
| 5162 | + unsigned StackHazardSize = getStackHazardSize(MF); |
5157 | 5163 | const uint64_t HazardSize = |
5158 | 5164 | (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize; |
5159 | 5165 |
|
|
0 commit comments