diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bbf2f26779545..7bd84a3952a79 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -126,14 +126,15 @@ // and the SME unit try to access the same area of memory, including if the // access is to an area of the stack. To try to alleviate this we attempt to // introduce extra padding into the stack frame between FP and GPR accesses, -// controlled by the StackHazardSize option. Without changing the layout of the -// stack frame in the diagram above, a stack object of size StackHazardSize is -// added between GPR and FPR CSRs. Another is added to the stack objects -// section, and stack objects are sorted so that FPR > Hazard padding slot > -// GPRs (where possible). Unfortunately some things are not handled well (VLA -// area, arguments on the stack, object with both GPR and FPR accesses), but if -// those are controlled by the user then the entire stack frame becomes GPR at -// the start/end with FPR in the middle, surrounded by Hazard padding. +// controlled by the aarch64-stack-hazard-size option. Without changing the +// layout of the stack frame in the diagram above, a stack object of size +// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added +// to the stack objects section, and stack objects are sorted so that FPR > +// Hazard padding slot > GPRs (where possible). Unfortunately some things are +// not handled well (VLA area, arguments on the stack, objects with both GPR and +// FPR accesses), but if those are controlled by the user then the entire stack +// frame becomes GPR at the start/end with FPR in the middle, surrounded by +// Hazard padding. // // An example of the prologue: // @@ -273,9 +274,6 @@ cl::opt EnableHomogeneousPrologEpilog( cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)")); -// Stack hazard padding size. 0 = disabled. -static cl::opt StackHazardSize("aarch64-stack-hazard-size", - cl::init(0), cl::Hidden); // Stack hazard size for analysis remarks. StackHazardSize takes precedence. static cl::opt StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), @@ -1614,6 +1612,10 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget().getStreamingHazardSize(); +} + // Convenience function to determine whether I is an SVE callee save. static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { @@ -2985,6 +2987,7 @@ static void computeCalleeSaveRegisterPairs( bool IsWindows = isTargetWindows(MF); bool NeedsWinCFI = needsWinCFI(MF); AArch64FunctionInfo *AFI = MF.getInfo(); + unsigned StackHazardSize = getStackHazardSize(MF); MachineFrameInfo &MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction().getCallingConv(); unsigned Count = CSI.size(); @@ -3612,6 +3615,7 @@ static std::optional getLdStFrameID(const MachineInstr &MI, // which can be used to determine if any hazard padding is needed. void AArch64FrameLowering::determineStackHazardSlot( MachineFunction &MF, BitVector &SavedRegs) const { + unsigned StackHazardSize = getStackHazardSize(MF); if (StackHazardSize == 0 || StackHazardSize % 16 != 0 || MF.getInfo()->hasStackHazardSlotIndex()) return; @@ -3802,7 +3806,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // StackHazardSize if so. determineStackHazardSlot(MF, SavedRegs); if (AFI->hasStackHazardSlotIndex()) - CSStackSize += StackHazardSize; + CSStackSize += getStackHazardSize(MF); // Save number of saved regs, so we can easily update CSStackSize later. unsigned NumSavedRegs = SavedRegs.count(); @@ -3917,6 +3921,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( std::vector &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const { bool NeedsWinCFI = needsWinCFI(MF); + unsigned StackHazardSize = getStackHazardSize(MF); // To match the canonical windows frame layout, reverse the list of // callee saved registers to get them laid out by PrologEpilogInserter // in the right order. (PrologEpilogInserter allocates stack objects top @@ -5151,6 +5156,7 @@ void AArch64FrameLowering::emitRemarks( if (Attrs.hasNonStreamingInterfaceAndBody()) return; + unsigned StackHazardSize = getStackHazardSize(MF); const uint64_t HazardSize = (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 32db1e8c2477a..7fb2a961e0313 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -76,6 +76,16 @@ static cl::opt AArch64MinimumJumpTableEntries( "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on AArch64")); +static cl::opt AArch64StreamingHazardSize( + "aarch64-streaming-hazard-size", + cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."), + cl::init(0), cl::Hidden); + +static cl::alias AArch64StreamingStackHazardSize( + "aarch64-stack-hazard-size", + cl::desc("alias for -aarch64-streaming-hazard-size"), + cl::aliasopt(AArch64StreamingHazardSize)); + unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) return OverrideVectorInsertExtractBaseCost; @@ -333,6 +343,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), IsLittle(LittleEndian), IsStreaming(IsStreaming), IsStreamingCompatible(IsStreamingCompatible), + StreamingHazardSize(AArch64StreamingHazardSize), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)), diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 9856415361e50..50adb7cbf69a8 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -84,6 +84,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool IsStreaming; bool IsStreamingCompatible; + unsigned StreamingHazardSize; unsigned MinSVEVectorSizeInBits; unsigned MaxSVEVectorSizeInBits; unsigned VScaleForTuning = 2; @@ -172,6 +173,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// Returns true if the function has a streaming-compatible body. bool isStreamingCompatible() const { return IsStreamingCompatible; } + /// Returns the size of memory region that if accessed by both the CPU and + /// the SME unit could result in a hazard. 0 = disabled. + unsigned getStreamingHazardSize() const { return StreamingHazardSize; } + /// Returns true if the target has NEON and the function at runtime is known /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE /// mode, which disables NEON instructions).