Skip to content

Commit 549330a

Browse files
committed
[AArch64] Fix SVE callee-save layout for nounwind functions on Windows
Without this change, functions with 'nounwind' don't compile (correctly), because the frame-lowering code makes the assumption that CFI is available when the function has SVE callee-saves.
1 parent 314dc33 commit 549330a

File tree

2 files changed

+54
-26
lines changed

2 files changed

+54
-26
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,28 @@ static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
355355
return false;
356356
}
357357

358+
static bool isTargetWindows(const MachineFunction &MF) {
359+
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
360+
}
361+
362+
// Windows unwind can't represent the required stack adjustments if we have
363+
// both SVE callee-saves and dynamic stack allocations, and the frame
364+
// pointer is before the SVE spills. The allocation of the frame pointer
365+
// must be the last instruction in the prologue so the unwinder can restore
366+
// the stack pointer correctly. (And there isn't any unwind opcode for
367+
// `addvl sp, x29, -17`.)
368+
//
369+
// Because of this, we do spills in the opposite order on Windows: first SVE,
370+
// then GPRs. The main side-effect of this is that it makes accessing
371+
// parameters passed on the stack more expensive.
372+
//
373+
// We could consider rearranging the spills for simpler cases.
374+
static bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) {
375+
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
376+
return isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize() &&
377+
needsWinCFI(MF);
378+
}
379+
358380
/// Returns true if a homogeneous prolog or epilog code can be emitted
359381
/// for the size optimization. If possible, a frame helper call is injected.
360382
/// When Exit block is given, this check is for epilog.
@@ -1694,10 +1716,6 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
16941716
}
16951717
}
16961718

1697-
static bool isTargetWindows(const MachineFunction &MF) {
1698-
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
1699-
}
1700-
17011719
static unsigned getStackHazardSize(const MachineFunction &MF) {
17021720
return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
17031721
}
@@ -2052,21 +2070,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
20522070
bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
20532071
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
20542072

2055-
// Windows unwind can't represent the required stack adjustments if we have
2056-
// both SVE callee-saves and dynamic stack allocations, and the frame
2057-
// pointer is before the SVE spills. The allocation of the frame pointer
2058-
// must be the last instruction in the prologue so the unwinder can restore
2059-
// the stack pointer correctly. (And there isn't any unwind opcode for
2060-
// `addvl sp, x29, -17`.)
2061-
//
2062-
// Because of this, we do spills in the opposite order on Windows: first SVE,
2063-
// then GPRs. The main side-effect of this is that it makes accessing
2064-
// parameters passed on the stack more expensive.
2065-
//
2066-
// We could consider rearranging the spills for simpler cases.
2067-
bool FPAfterSVECalleeSaves =
2068-
Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
2069-
2073+
bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
20702074
if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
20712075
reportFatalUsageError("SME hazard padding is not supported on Windows");
20722076

@@ -2566,8 +2570,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25662570
return;
25672571
}
25682572

2569-
bool FPAfterSVECalleeSaves =
2570-
Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
2573+
bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
25712574

25722575
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
25732576
// Assume we can't combine the last pop with the sp restore.
@@ -2895,8 +2898,7 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
28952898
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
28962899

28972900
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
2898-
bool FPAfterSVECalleeSaves =
2899-
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
2901+
bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
29002902
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
29012903
if (FPAfterSVECalleeSaves &&
29022904
-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
@@ -3053,8 +3055,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
30533055
"In the presence of dynamic stack pointer realignment, "
30543056
"non-argument/CSR objects cannot be accessed through the frame pointer");
30553057

3056-
bool FPAfterSVECalleeSaves =
3057-
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
3058+
bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
30583059

30593060
if (isSVE) {
30603061
StackOffset FPOffset =
@@ -3279,7 +3280,7 @@ static void computeCalleeSaveRegisterPairs(
32793280
RegInc = -1;
32803281
FirstReg = Count - 1;
32813282
}
3282-
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
3283+
bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
32833284
int ScalableByteOffset =
32843285
FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
32853286
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();

llvm/test/CodeGen/AArch64/framelayout-sve-win.mir

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
define aarch64_sve_vector_pcs void @save_restore_sve() uwtable { entry: unreachable }
1818
define aarch64_sve_vector_pcs void @save_restore_sve_realign() uwtable { entry: unreachable }
1919
define aarch64_sve_vector_pcs void @frame_layout() uwtable { entry: unreachable }
20+
define aarch64_sve_vector_pcs void @test_nounwind_layout() nounwind { entry: unreachable }
2021
...
2122
---
2223
name: test_allocate_sve
@@ -892,3 +893,29 @@ body: |
892893
893894
RET_ReallyLR
894895
...
896+
---
897+
name: test_nounwind_layout
898+
stack:
899+
body: |
900+
bb.0.entry:
901+
; CHECK-LABEL: name: test_nounwind_layout
902+
; CHECK: fixedStack:
903+
; CHECK: liveins: $x20, $lr, $z8, $p8
904+
; CHECK-NEXT: {{ $}}
905+
; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $lr, killed $x20, $sp, -2 :: (store (s64) into %stack.3), (store (s64) into %stack.2)
906+
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
907+
; CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 1 :: (store (s128) into %stack.1)
908+
; CHECK-NEXT: frame-setup STR_PXI killed $p8, $sp, 15 :: (store (s16) into %stack.0)
909+
; CHECK-NEXT: $x20 = IMPLICIT_DEF
910+
; CHECK-NEXT: $p8 = IMPLICIT_DEF
911+
; CHECK-NEXT: $z8 = IMPLICIT_DEF
912+
; CHECK-NEXT: $p8 = frame-destroy LDR_PXI $sp, 15 :: (load (s16) from %stack.0)
913+
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1)
914+
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
915+
; CHECK-NEXT: early-clobber $sp, $lr, $x20 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.3), (load (s64) from %stack.2)
916+
; CHECK-NEXT: RET_ReallyLR
917+
$x20 = IMPLICIT_DEF
918+
$p8 = IMPLICIT_DEF
919+
$z8 = IMPLICIT_DEF
920+
RET_ReallyLR
921+
...

0 commit comments

Comments
 (0)