Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 31 additions & 26 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,16 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
return false;
}

static bool isTargetWindows(const MachineFunction &MF) {
return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
}

bool AArch64FrameLowering::hasSVECalleeSavesAboveFrameRecord(
const MachineFunction &MF) const {
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
return isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
}

/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
/// When Exit block is given, this check is for epilog.
Expand All @@ -396,7 +406,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
return false;

// TODO: Window is supported yet.
if (needsWinCFI(MF))
if (isTargetWindows(MF))
return false;

// TODO: SVE is not supported yet.
Expand Down Expand Up @@ -1153,10 +1163,6 @@ bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
return true;
}

static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}

void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
Expand Down Expand Up @@ -1255,8 +1261,7 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());

const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);
if (MFI.hasScalableStackID(FI)) {
if (FPAfterSVECalleeSaves &&
-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
Expand Down Expand Up @@ -1426,8 +1431,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");

bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF);

if (isSVE) {
StackOffset FPOffset = StackOffset::get(
Expand Down Expand Up @@ -1643,7 +1647,6 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
return;

bool IsWindows = isTargetWindows(MF);
bool NeedsWinCFI = AFL.needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned StackHazardSize = getStackHazardSize(MF);
MachineFrameInfo &MFI = MF.getFrameInfo();
Expand All @@ -1661,7 +1664,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
int StackFillDir = -1;
int RegInc = 1;
unsigned FirstReg = 0;
if (NeedsWinCFI) {
if (IsWindows) {
// For WinCFI, fill the stack from the bottom up.
ByteOffset = 0;
StackFillDir = 1;
Expand All @@ -1671,7 +1674,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
FirstReg = Count - 1;
}

bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
bool FPAfterSVECalleeSaves = AFL.hasSVECalleeSavesAboveFrameRecord(MF);

int ZPRByteOffset = 0;
int PPRByteOffset = 0;
Expand Down Expand Up @@ -1728,6 +1731,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
ByteOffset += StackFillDir * StackHazardSize;
LastReg = RPI.Reg1;

bool NeedsWinCFI = AFL.needsWinCFI(MF);
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
Expand All @@ -1743,8 +1747,9 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
IsFirst, TRI))
!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
NeedsWinCFI, NeedsFrameRecord, IsFirst,
TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:
Expand Down Expand Up @@ -1798,7 +1803,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
"Callee-save registers not saved as adjacent register pair!");

RPI.FrameIdx = CSI[i].getFrameIdx();
if (NeedsWinCFI &&
if (IsWindows &&
RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();

Expand All @@ -1825,7 +1830,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,

// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
if (NeedGapToAlignStack && !IsWindows && !RPI.isScalable() &&
RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
ByteOffset % 16 != 0) {
ByteOffset += 8 * StackFillDir;
Expand All @@ -1841,7 +1846,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
assert(OffsetPost % Scale == 0);
// If filling top down (default), we want the offset after incrementing it.
// If filling bottom up (WinCFI) we need the original offset.
int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
int Offset = IsWindows ? OffsetPre : OffsetPost;

// The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
// Swift context can directly precede FP.
Expand Down Expand Up @@ -1880,7 +1885,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
if (RPI.isPaired())
i += RegInc;
}
if (NeedsWinCFI) {
if (IsWindows) {
// If we need an alignment gap in the stack, align the topmost stack
// object. A stack frame with a gap looks like this, bottom up:
// x19, d8. d9, gap.
Expand Down Expand Up @@ -2018,14 +2023,15 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
dbgs() << ")\n";
});

assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
assert((!isTargetWindows(MF) ||
!(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
"Windows unwdinding requires a consecutive (FP,LR) pair");
// Windows unwind codes require consecutive registers if registers are
// paired. Make the switch here, so that the code below will save (x,x+1)
// and not (x+1,x).
unsigned FrameIdxReg1 = RPI.FrameIdx;
unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
if (NeedsWinCFI && RPI.isPaired()) {
if (isTargetWindows(MF) && RPI.isPaired()) {
std::swap(Reg1, Reg2);
std::swap(FrameIdxReg1, FrameIdxReg2);
}
Expand Down Expand Up @@ -2194,7 +2200,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
// and not (x+1,x).
unsigned FrameIdxReg1 = RPI.FrameIdx;
unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
if (NeedsWinCFI && RPI.isPaired()) {
if (isTargetWindows(MF) && RPI.isPaired()) {
std::swap(Reg1, Reg2);
std::swap(FrameIdxReg1, FrameIdxReg2);
}
Expand Down Expand Up @@ -2706,14 +2712,14 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *RegInfo,
std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
unsigned &MaxCSFrameIndex) const {
bool NeedsWinCFI = needsWinCFI(MF);
bool IsWindows = isTargetWindows(MF);
unsigned StackHazardSize = getStackHazardSize(MF);
// To match the canonical windows frame layout, reverse the list of
// callee saved registers to get them laid out by PrologEpilogInserter
// in the right order. (PrologEpilogInserter allocates stack objects top
// down. Windows canonical prologs store higher numbered registers at
// the top, thus have the CSI array start from the highest registers.)
if (NeedsWinCFI)
if (IsWindows)
std::reverse(CSI.begin(), CSI.end());

if (CSI.empty())
Expand All @@ -2724,8 +2730,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *AFI = MF.getInfo<AArch64FunctionInfo>();

bool UsesWinAAPCS = isTargetWindows(MF);
if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
if (IsWindows && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
int FrameIdx = MFI.CreateStackObject(8, Align(16), true);
AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
if ((unsigned)FrameIdx < MinCSFrameIndex)
Expand Down Expand Up @@ -2778,7 +2783,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
MaxCSFrameIndex = FrameIdx;

// Grab 8 bytes below FP for the extended asynchronous frame info.
if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS &&
if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !IsWindows &&
Reg == AArch64::FP) {
FrameIdx = MFI.CreateStackObject(8, Alignment, true);
AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,20 @@ class AArch64FrameLowering : public TargetFrameLowering {
friend class AArch64PrologueEmitter;
friend class AArch64EpilogueEmitter;

// Windows unwind can't represent the required stack adjustments if we have
// both SVE callee-saves and dynamic stack allocations, and the frame
// pointer is before the SVE spills. The allocation of the frame pointer
// must be the last instruction in the prologue so the unwinder can restore
// the stack pointer correctly. (And there isn't any unwind opcode for
// `addvl sp, x29, -17`.)
//
// Because of this, we do spills in the opposite order on Windows: first SVE,
// then GPRs. The main side-effect of this is that it makes accessing
// parameters passed on the stack more expensive.
//
// We could consider rearranging the spills for simpler cases.
bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) const;

protected:
bool hasFPImpl(const MachineFunction &MF) const override;

Expand Down
14 changes: 1 addition & 13 deletions llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,7 @@ AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
HasFP = AFL.hasFP(MF);
NeedsWinCFI = AFL.needsWinCFI(MF);

// Windows unwind can't represent the required stack adjustments if we have
// both SVE callee-saves and dynamic stack allocations, and the frame pointer
// is before the SVE spills. The allocation of the frame pointer must be the
// last instruction in the prologue so the unwinder can restore the stack
// pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29,
// -17`.)
//
// Because of this, we do spills in the opposite order on Windows: first SVE,
// then GPRs. The main side-effect of this is that it makes accessing
// parameters passed on the stack more expensive.
//
// We could consider rearranging the spills for simpler cases.
if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) {
if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) {
if (AFI->hasStackHazardSlotIndex())
reportFatalUsageError("SME hazard padding is not supported on Windows");
SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@
define i32 @no_int_regs(i32 %x) nounwind {
; CHECK-LABEL: no_int_regs:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: str x27, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: str w0, [sp, #28] // 4-byte Folded Spill
; CHECK-NEXT: stp x19, x20, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: str x27, [sp, #48] // 8-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #56] // 16-byte Folded Spill
; CHECK-NEXT: str w0, [sp, #76] // 4-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr w0, [sp, #28] // 4-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #56] // 16-byte Folded Reload
; CHECK-NEXT: ldr w0, [sp, #76] // 4-byte Folded Reload
; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x27, [sp, #48] // 8-byte Folded Reload
; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x19, x20, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x15},~{x16},~{x17},~{x19},~{x20},~{x21},~{x22},~{x25},~{x26},~{x27},~{fp},~{lr}"()
Expand All @@ -32,20 +32,20 @@ entry:
define i32 @one_int_reg(i32 %x) nounwind {
; CHECK-LABEL: one_int_reg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: str x27, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: stp x19, x20, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: str x27, [sp, #48] // 8-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #56] // 16-byte Folded Spill
; CHECK-NEXT: mov w30, w0
; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mov w0, w30
; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #56] // 16-byte Folded Reload
; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr x27, [sp, #48] // 8-byte Folded Reload
; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x19, x20, [sp], #80 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x15},~{x16},~{x17},~{x19},~{x20},~{x21},~{x22},~{x25},~{x26},~{x27},~{fp}"()
Expand All @@ -56,18 +56,18 @@ define float @no_float_regs(float %x) nounwind {
; CHECK-LABEL: no_float_regs:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #80
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp d8, d9, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d10, d11, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d12, d13, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d14, d15, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d14, d15, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d12, d13, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d10, d11, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d8, d9, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
entry:
Expand All @@ -78,18 +78,18 @@ entry:
define float @one_float_reg(float %x) nounwind {
; CHECK-LABEL: one_float_reg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
; CHECK-NEXT: stp d8, d9, [sp, #-64]! // 16-byte Folded Spill
; CHECK-NEXT: stp d14, d15, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: fmov s15, s0
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d10, d11, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d12, d13, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d12, d13, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: fmov s0, s15
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: ldp d14, d15, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d10, d11, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d8, d9, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14}"()
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
define aarch64_sve_vector_pcs void @save_restore_sve() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @save_restore_sve_realign() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @frame_layout() uwtable { entry: unreachable }
define aarch64_sve_vector_pcs void @test_nounwind_layout() nounwind { entry: unreachable }
...
---
name: test_allocate_sve
Expand Down Expand Up @@ -882,3 +883,29 @@ body: |

RET_ReallyLR
...
---
name: test_nounwind_layout
stack:
body: |
bb.0.entry:
; CHECK-LABEL: name: test_nounwind_layout
; CHECK: fixedStack:
; CHECK: liveins: $p8, $z8, $lr, $x20
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg
; CHECK-NEXT: frame-setup STR_PXI killed $p8, $sp, 0 :: (store (s16) into %stack.3)
; CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 1 :: (store (s128) into %stack.2)
; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $x20, killed $lr, $sp, -2 :: (store (s64) into %stack.0), (store (s64) into %stack.1)
; CHECK-NEXT: $x20 = IMPLICIT_DEF
; CHECK-NEXT: $p8 = IMPLICIT_DEF
; CHECK-NEXT: $z8 = IMPLICIT_DEF
; CHECK-NEXT: early-clobber $sp, $x20, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.0), (load (s64) from %stack.1)
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.2)
; CHECK-NEXT: $p8 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.3)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg
; CHECK-NEXT: RET_ReallyLR
$x20 = IMPLICIT_DEF
$p8 = IMPLICIT_DEF
$z8 = IMPLICIT_DEF
RET_ReallyLR
...
Loading