Skip to content

Commit e52f48f

Browse files
committed
AArch64: align pair-wise spills on WoS to 16-byte (llvm#166902)
Adjust the frame setup code for Windows ARM64 to attempt to align pair-wise spills to 16-byte boundaries. This enables us to properly emit the spills for custom clang calling convensions such as preserve most which spills r9-r15 which are normally nonvolatile registers. Even when using the ARM64EC opcodes for the unwinding, we cannot represent the spill if it is unaligned. (cherry picked from commit 2d8563f)
1 parent 45ef823 commit e52f48f

File tree

2 files changed

+77
-27
lines changed

2 files changed

+77
-27
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3337,8 +3337,10 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
33373337
!requiresSaveVG(MF) && !AFI->isSVECC();
33383338
}
33393339

3340-
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
3341-
bool NeedsWinCFI, bool IsFirst,
3340+
static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile,
3341+
unsigned SpillCount, unsigned Reg1,
3342+
unsigned Reg2, bool NeedsWinCFI,
3343+
bool IsFirst,
33423344
const TargetRegisterInfo *TRI) {
33433345
// If we are generating register pairs for a Windows function that requires
33443346
// EH support, then pair consecutive registers only. There are no unwind
@@ -3351,8 +3353,18 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
33513353
return true;
33523354
if (!NeedsWinCFI)
33533355
return false;
3356+
3357+
// ARM64EC introduced `save_any_regp`, which expects 16-byte alignment.
3358+
// This is handled by only allowing paired spills for registers spilled at
3359+
// even positions (which should be 16-byte aligned, as other GPRs/FPRs are
3360+
// 8-bytes). We carve out an exception for {FP,LR}, which does not require
3361+
// 16-byte alignment in the uop representation.
33543362
if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
3355-
return false;
3363+
return SpillExtendedVolatile
3364+
? !((Reg1 == AArch64::FP && Reg2 == AArch64::LR) ||
3365+
(SpillCount % 2) == 0)
3366+
: false;
3367+
33563368
// If pairing a GPR with LR, the pair can be described by the save_lrpair
33573369
// opcode. If this is the first register pair, it would end up with a
33583370
// predecrement, but there's no save_lrpair_x opcode, so we can only do this
@@ -3368,12 +3380,15 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
33683380
/// WindowsCFI requires that only consecutive registers can be paired.
33693381
/// LR and FP need to be allocated together when the frame needs to save
33703382
/// the frame-record. This means any other register pairing with LR is invalid.
3371-
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
3372-
bool UsesWinAAPCS, bool NeedsWinCFI,
3373-
bool NeedsFrameRecord, bool IsFirst,
3383+
static bool invalidateRegisterPairing(bool SpillExtendedVolatile,
3384+
unsigned SpillCount, unsigned Reg1,
3385+
unsigned Reg2, bool UsesWinAAPCS,
3386+
bool NeedsWinCFI, bool NeedsFrameRecord,
3387+
bool IsFirst,
33743388
const TargetRegisterInfo *TRI) {
33753389
if (UsesWinAAPCS)
3376-
return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
3390+
return invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
3391+
Reg1, Reg2, NeedsWinCFI, IsFirst,
33773392
TRI);
33783393

33793394
// If we need to store the frame record, don't pair any register
@@ -3469,6 +3484,23 @@ static void computeCalleeSaveRegisterPairs(
34693484
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
34703485
int ScalableByteOffset =
34713486
FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
3487+
3488+
// Windows AAPCS has x9-x15 as volatile registers, x16-x17 as intra-procedural
3489+
// scratch, x18 as platform reserved. However, clang has extended calling
3490+
// convensions such as preserve_most and preserve_all which treat these as
3491+
// CSR. As such, the ARM64 unwind uOPs bias registers by 19. We use ARM64EC
3492+
// uOPs which have separate restrictions. We need to check for that.
3493+
//
3494+
// NOTE: we currently do not account for the D registers as LLVM does not
3495+
// support non-ABI compliant D register spills.
3496+
bool SpillExtendedVolatile =
3497+
IsWindows && std::any_of(std::begin(CSI), std::end(CSI),
3498+
[](const CalleeSavedInfo &CSI) {
3499+
const auto &Reg = CSI.getReg();
3500+
return Reg >= AArch64::X0 &&
3501+
Reg <= AArch64::X18;
3502+
});
3503+
34723504
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
34733505
Register LastReg = 0;
34743506

@@ -3511,17 +3543,19 @@ static void computeCalleeSaveRegisterPairs(
35113543
if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
35123544
MCRegister NextReg = CSI[i + RegInc].getReg();
35133545
bool IsFirst = i == FirstReg;
3546+
unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
35143547
switch (RPI.Type) {
35153548
case RegPairInfo::GPR:
35163549
if (AArch64::GPR64RegClass.contains(NextReg) &&
3517-
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
3518-
NeedsWinCFI, NeedsFrameRecord, IsFirst,
3519-
TRI))
3550+
!invalidateRegisterPairing(
3551+
SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
3552+
NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
35203553
RPI.Reg2 = NextReg;
35213554
break;
35223555
case RegPairInfo::FPR64:
35233556
if (AArch64::FPR64RegClass.contains(NextReg) &&
3524-
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
3557+
!invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
3558+
RPI.Reg1, NextReg, NeedsWinCFI,
35253559
IsFirst, TRI))
35263560
RPI.Reg2 = NextReg;
35273561
break;
Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s
2+
; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype obj -o - %s | llvm-readobj -u - | FileCheck %s -check-prefix CHECK-UNWIND
23

34
declare dso_local void @g(ptr noundef)
45
define dso_local preserve_mostcc void @f(ptr noundef %p) #0 {
@@ -12,23 +13,38 @@ entry:
1213

1314
attributes #0 = { nounwind uwtable(sync) }
1415

15-
; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]]
16-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
17-
; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]]
18-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
19-
; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]]
20-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
21-
; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]]
22-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
16+
; CHECK: str x30, [sp, #16]
17+
; CHECK-NEXT: .seh_save_reg x30, 16
18+
; CHECK: str x9, [sp, #24]
19+
; CHECK-NEXT: .seh_save_any_reg x9, 24
20+
; CHECK: stp x10, x11, [sp, #32
21+
; CHECK-NEXT: .seh_save_any_reg_p x10, 32
22+
; CHECK: stp x12, x13, [sp, #48]
23+
; CHECK-NEXT: .seh_save_any_reg_p x12, 48
24+
; CHECK: stp x14, x15, [sp, #64]
25+
; CHECK-NEXT: .seh_save_any_reg_p x14, 64
2326
; CHECK: .seh_endprologue
2427

2528
; CHECK: .seh_startepilogue
26-
; CHECK: ldr x15, [sp, #[[OFFSET_3]]]
27-
; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]]
28-
; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]]
29-
; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]]
30-
; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]]
31-
; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]]
32-
; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]]
33-
; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]]
29+
; CHECK: ldp x14, x15, [sp, #64]
30+
; CHECK-NEXT: .seh_save_any_reg_p x14, 64
31+
; CHECK: ldp x12, x13, [sp, #48]
32+
; CHECK-NEXT: .seh_save_any_reg_p x12, 48
33+
; CHECK: ldp x10, x11, [sp, #32
34+
; CHECK-NEXT: .seh_save_any_reg_p x10, 32
35+
; CHECK: ldr x9, [sp, #24]
36+
; CHECK-NEXT: .seh_save_any_reg x9, 24
37+
; CHECK: ldr x30, [sp, #16]
38+
; CHECK-NEXT: .seh_save_reg x30, 16
39+
3440
; CHECK: .seh_endepilogue
41+
42+
; CHECK-UNWIND: Prologue [
43+
; CHECK-UNWIND: 0xe74e04 ; stp x14, x15, [sp, #64]
44+
; CHECK-UNWIND: 0xe74c03 ; stp x12, x13, [sp, #48]
45+
; CHECK-UNWIND: 0xe74a02 ; stp x10, x11, [sp, #32]
46+
; CHECK-UNWIND: 0xe70903 ; str x9, [sp, #24]
47+
; CHECK-UNWIND: 0xd2c2 ; str x30, [sp, #16]
48+
; CHECK-UNWIND: 0x05 ; sub sp, #80
49+
; CHECK-UNWIND: 0xe4 ; end
50+
; CHECK-UNWIND: ]

0 commit comments

Comments
 (0)