diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 0f7b34c36055f..2140a333b487d 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -383,6 +383,16 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL, return false; } +static bool isTargetWindows(const MachineFunction &MF) { + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); +} + +bool AArch64FrameLowering::hasSVECalleeSavesAboveFrameRecord( + const MachineFunction &MF) const { + auto *AFI = MF.getInfo(); + return isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize(); +} + /// Returns true if a homogeneous prolog or epilog code can be emitted /// for the size optimization. If possible, a frame helper call is injected. /// When Exit block is given, this check is for epilog. @@ -396,7 +406,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( return false; // TODO: Window is supported yet. - if (needsWinCFI(MF)) + if (isTargetWindows(MF)) return false; // TODO: SVE is not supported yet. @@ -1153,10 +1163,6 @@ bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const { return true; } -static bool isTargetWindows(const MachineFunction &MF) { - return MF.getSubtarget().isTargetWindows(); -} - void AArch64FrameLowering::emitPacRetPlusLeafHardening( MachineFunction &MF) const { const AArch64Subtarget &Subtarget = MF.getSubtarget(); @@ -1255,8 +1261,7 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); const auto *AFI = MF.getInfo(); - bool FPAfterSVECalleeSaves = - isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize(); + bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF); if (MFI.hasScalableStackID(FI)) { if (FPAfterSVECalleeSaves && -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) { @@ -1426,8 +1431,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( "In the presence of dynamic stack pointer realignment, " "non-argument/CSR objects cannot be accessed through the frame pointer"); - bool FPAfterSVECalleeSaves = - isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize(); + bool FPAfterSVECalleeSaves = hasSVECalleeSavesAboveFrameRecord(MF); if (isSVE) { StackOffset FPOffset = StackOffset::get( @@ -1643,7 +1647,6 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, return; bool IsWindows = isTargetWindows(MF); - bool NeedsWinCFI = AFL.needsWinCFI(MF); AArch64FunctionInfo *AFI = MF.getInfo(); unsigned StackHazardSize = getStackHazardSize(MF); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -1661,7 +1664,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, int StackFillDir = -1; int RegInc = 1; unsigned FirstReg = 0; - if (NeedsWinCFI) { + if (IsWindows) { // For WinCFI, fill the stack from the bottom up. ByteOffset = 0; StackFillDir = 1; @@ -1671,7 +1674,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, FirstReg = Count - 1; } - bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize(); + bool FPAfterSVECalleeSaves = AFL.hasSVECalleeSavesAboveFrameRecord(MF); int ZPRByteOffset = 0; int PPRByteOffset = 0; @@ -1728,6 +1731,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, ByteOffset += StackFillDir * StackHazardSize; LastReg = RPI.Reg1; + bool NeedsWinCFI = AFL.needsWinCFI(MF); int Scale = TRI->getSpillSize(*RPI.RC); // Add the next reg to the pair if it is in the same register class. if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) { @@ -1743,8 +1747,9 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, break; case RegPairInfo::FPR64: if (AArch64::FPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, - IsFirst, TRI)) + !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, + NeedsWinCFI, NeedsFrameRecord, IsFirst, + TRI)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR128: @@ -1798,7 +1803,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, "Callee-save registers not saved as adjacent register pair!"); RPI.FrameIdx = CSI[i].getFrameIdx(); - if (NeedsWinCFI && + if (IsWindows && RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair RPI.FrameIdx = CSI[i + RegInc].getFrameIdx(); @@ -1825,7 +1830,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. - if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() && + if (NeedGapToAlignStack && !IsWindows && !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() && ByteOffset % 16 != 0) { ByteOffset += 8 * StackFillDir; @@ -1841,7 +1846,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, assert(OffsetPost % Scale == 0); // If filling top down (default), we want the offset after incrementing it. // If filling bottom up (WinCFI) we need the original offset. - int Offset = NeedsWinCFI ? OffsetPre : OffsetPost; + int Offset = IsWindows ? OffsetPre : OffsetPost; // The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the // Swift context can directly precede FP. @@ -1880,7 +1885,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, if (RPI.isPaired()) i += RegInc; } - if (NeedsWinCFI) { + if (IsWindows) { // If we need an alignment gap in the stack, align the topmost stack // object. A stack frame with a gap looks like this, bottom up: // x19, d8. d9, gap. @@ -2018,14 +2023,15 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( dbgs() << ")\n"; }); - assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && + assert((!isTargetWindows(MF) || + !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && "Windows unwdinding requires a consecutive (FP,LR) pair"); // Windows unwind codes require consecutive registers if registers are // paired. Make the switch here, so that the code below will save (x,x+1) // and not (x+1,x). unsigned FrameIdxReg1 = RPI.FrameIdx; unsigned FrameIdxReg2 = RPI.FrameIdx + 1; - if (NeedsWinCFI && RPI.isPaired()) { + if (isTargetWindows(MF) && RPI.isPaired()) { std::swap(Reg1, Reg2); std::swap(FrameIdxReg1, FrameIdxReg2); } @@ -2194,7 +2200,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( // and not (x+1,x). unsigned FrameIdxReg1 = RPI.FrameIdx; unsigned FrameIdxReg2 = RPI.FrameIdx + 1; - if (NeedsWinCFI && RPI.isPaired()) { + if (isTargetWindows(MF) && RPI.isPaired()) { std::swap(Reg1, Reg2); std::swap(FrameIdxReg1, FrameIdxReg2); } @@ -2706,14 +2712,14 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *RegInfo, std::vector &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const { - bool NeedsWinCFI = needsWinCFI(MF); + bool IsWindows = isTargetWindows(MF); unsigned StackHazardSize = getStackHazardSize(MF); // To match the canonical windows frame layout, reverse the list of // callee saved registers to get them laid out by PrologEpilogInserter // in the right order. (PrologEpilogInserter allocates stack objects top // down. Windows canonical prologs store higher numbered registers at // the top, thus have the CSI array start from the highest registers.) - if (NeedsWinCFI) + if (IsWindows) std::reverse(CSI.begin(), CSI.end()); if (CSI.empty()) @@ -2724,8 +2730,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( MachineFrameInfo &MFI = MF.getFrameInfo(); auto *AFI = MF.getInfo(); - bool UsesWinAAPCS = isTargetWindows(MF); - if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) { + if (IsWindows && hasFP(MF) && AFI->hasSwiftAsyncContext()) { int FrameIdx = MFI.CreateStackObject(8, Align(16), true); AFI->setSwiftAsyncContextFrameIdx(FrameIdx); if ((unsigned)FrameIdx < MinCSFrameIndex) @@ -2778,7 +2783,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( MaxCSFrameIndex = FrameIdx; // Grab 8 bytes below FP for the extended asynchronous frame info. - if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS && + if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !IsWindows && Reg == AArch64::FP) { FrameIdx = MFI.CreateStackObject(8, Alignment, true); AFI->setSwiftAsyncContextFrameIdx(FrameIdx); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 32a9bd831989c..68ed6080199f7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -169,6 +169,20 @@ class AArch64FrameLowering : public TargetFrameLowering { friend class AArch64PrologueEmitter; friend class AArch64EpilogueEmitter; + // Windows unwind can't represent the required stack adjustments if we have + // both SVE callee-saves and dynamic stack allocations, and the frame + // pointer is before the SVE spills. The allocation of the frame pointer + // must be the last instruction in the prologue so the unwinder can restore + // the stack pointer correctly. (And there isn't any unwind opcode for + // `addvl sp, x29, -17`.) + // + // Because of this, we do spills in the opposite order on Windows: first SVE, + // then GPRs. The main side-effect of this is that it makes accessing + // parameters passed on the stack more expensive. + // + // We could consider rearranging the spills for simpler cases. + bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) const; + protected: bool hasFPImpl(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 7e03b97584fe1..3120c0f1e6a24 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -96,19 +96,7 @@ AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon( HasFP = AFL.hasFP(MF); NeedsWinCFI = AFL.needsWinCFI(MF); - // Windows unwind can't represent the required stack adjustments if we have - // both SVE callee-saves and dynamic stack allocations, and the frame pointer - // is before the SVE spills. The allocation of the frame pointer must be the - // last instruction in the prologue so the unwinder can restore the stack - // pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29, - // -17`.) - // - // Because of this, we do spills in the opposite order on Windows: first SVE, - // then GPRs. The main side-effect of this is that it makes accessing - // parameters passed on the stack more expensive. - // - // We could consider rearranging the spills for simpler cases. - if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) { + if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) { if (AFI->hasStackHazardSlotIndex()) reportFatalUsageError("SME hazard padding is not supported on Windows"); SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord; diff --git a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll index 91ec870dd6d0c..4e6cbccb1a5ae 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll @@ -9,20 +9,20 @@ define i32 @no_int_regs(i32 %x) nounwind { ; CHECK-LABEL: no_int_regs: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: str x27, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str w0, [sp, #28] // 4-byte Folded Spill +; CHECK-NEXT: stp x19, x20, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str x27, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #56] // 16-byte Folded Spill +; CHECK-NEXT: str w0, [sp, #76] // 4-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr w0, [sp, #28] // 4-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #56] // 16-byte Folded Reload +; CHECK-NEXT: ldr w0, [sp, #76] // 4-byte Folded Reload +; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x27, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x19, x20, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: tail call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x15},~{x16},~{x17},~{x19},~{x20},~{x21},~{x22},~{x25},~{x26},~{x27},~{fp},~{lr}"() @@ -32,20 +32,20 @@ entry: define i32 @one_int_reg(i32 %x) nounwind { ; CHECK-LABEL: one_int_reg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x29, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: str x27, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x20, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str x27, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #56] // 16-byte Folded Spill ; CHECK-NEXT: mov w30, w0 -; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, w30 -; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #56] // 16-byte Folded Reload +; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x27, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x19, x20, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: tail call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x15},~{x16},~{x17},~{x19},~{x20},~{x21},~{x22},~{x25},~{x26},~{x27},~{fp}"() @@ -56,18 +56,18 @@ define float @no_float_regs(float %x) nounwind { ; CHECK-LABEL: no_float_regs: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp d8, d9, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d10, d11, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d12, d13, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d14, d15, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d14, d15, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d12, d13, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d10, d11, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d8, d9, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret entry: @@ -78,18 +78,18 @@ entry: define float @one_float_reg(float %x) nounwind { ; CHECK-LABEL: one_float_reg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d8, d9, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d14, d15, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: fmov s15, s0 -; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d10, d11, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d12, d13, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d12, d13, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, s15 -; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ldp d14, d15, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d10, d11, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d8, d9, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14}"() diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir index b8302e64f282d..0d84fa14d8672 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir @@ -17,6 +17,7 @@ define aarch64_sve_vector_pcs void @save_restore_sve() uwtable { entry: unreachable } define aarch64_sve_vector_pcs void @save_restore_sve_realign() uwtable { entry: unreachable } define aarch64_sve_vector_pcs void @frame_layout() uwtable { entry: unreachable } + define aarch64_sve_vector_pcs void @test_nounwind_layout() nounwind { entry: unreachable } ... --- name: test_allocate_sve @@ -882,3 +883,29 @@ body: | RET_ReallyLR ... +--- +name: test_nounwind_layout +stack: +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_nounwind_layout + ; CHECK: fixedStack: + ; CHECK: liveins: $p8, $z8, $lr, $x20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg + ; CHECK-NEXT: frame-setup STR_PXI killed $p8, $sp, 0 :: (store (s16) into %stack.3) + ; CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 1 :: (store (s128) into %stack.2) + ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $x20, killed $lr, $sp, -2 :: (store (s64) into %stack.0), (store (s64) into %stack.1) + ; CHECK-NEXT: $x20 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $z8 = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber $sp, $x20, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.0), (load (s64) from %stack.1) + ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.2) + ; CHECK-NEXT: $p8 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.3) + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg + ; CHECK-NEXT: RET_ReallyLR + $x20 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $z8 = IMPLICIT_DEF + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll index 4e1876db772ed..935374fec9f87 100644 --- a/llvm/test/CodeGen/AArch64/llvm.frexp.ll +++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll @@ -133,7 +133,7 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind ; WINDOWS-NEXT: mov h1, v0.h[1] ; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; WINDOWS-NEXT: add x0, sp, #36 -; WINDOWS-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x30, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #36 ; WINDOWS-NEXT: fcvt d0, h1 ; WINDOWS-NEXT: bl frexp @@ -166,7 +166,7 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) nounwind ; WINDOWS-NEXT: ldr s1, [sp, #32] ; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] -; WINDOWS-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x30, [sp, #48] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1 ; WINDOWS-NEXT: mov v0.h[3], v2.h[0] ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -297,7 +297,7 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind { ; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: fcvt d0, h0 -; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; WINDOWS-NEXT: add x0, sp, #28 @@ -307,7 +307,7 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) nounwind { ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr s0, [sp, #24] ; WINDOWS-NEXT: ld1 { v0.s }[1], [x19] -; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: add sp, sp, #48 ; WINDOWS-NEXT: ret @@ -360,9 +360,9 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f32_v3i32(<3 x float> %a) nounwi ; WINDOWS-NEXT: mov s1, v0.s[1] ; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; WINDOWS-NEXT: add x0, sp, #36 -; WINDOWS-NEXT: stp x30, x21, [sp, #48] // 16-byte Folded Spill -; WINDOWS-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x20, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #36 +; WINDOWS-NEXT: stp x21, x30, [sp, #64] // 16-byte Folded Spill ; WINDOWS-NEXT: fcvt d0, s1 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 @@ -397,9 +397,9 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f32_v3i32(<3 x float> %a) nounwi ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] ; WINDOWS-NEXT: mov v0.s[3], v2.s[0] ; WINDOWS-NEXT: ld1 { v1.s }[2], [x20] -; WINDOWS-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x20, [sp, #48] // 16-byte Folded Reload ; WINDOWS-NEXT: ld1 { v1.s }[3], [x21] -; WINDOWS-NEXT: ldp x30, x21, [sp, #48] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x21, x30, [sp, #64] // 16-byte Folded Reload ; WINDOWS-NEXT: add sp, sp, #80 ; WINDOWS-NEXT: ret %result = call { <3 x float>, <3 x i32> } @llvm.frexp.v3f32.v3i32(<3 x float> %a) @@ -536,7 +536,7 @@ define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) nounwi ; WINDOWS-NEXT: mov s1, v0.s[1] ; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill ; WINDOWS-NEXT: add x0, sp, #28 -; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #28 ; WINDOWS-NEXT: fcvt d0, s1 ; WINDOWS-NEXT: bl frexp @@ -551,7 +551,7 @@ define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) nounwi ; WINDOWS-NEXT: ldr s1, [sp, #24] ; WINDOWS-NEXT: ldr q2, [sp] // 16-byte Folded Reload ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] -; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1 ; WINDOWS-NEXT: mov v0.s[1], v2.s[0] ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -643,7 +643,7 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) nounwind { ; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill ; WINDOWS-NEXT: add x0, sp, #24 ; WINDOWS-NEXT: fcvt d0, s0 -; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; WINDOWS-NEXT: add x0, sp, #28 @@ -653,7 +653,7 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) nounwind { ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr s0, [sp, #24] ; WINDOWS-NEXT: ld1 { v0.s }[1], [x19] -; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: add sp, sp, #48 ; WINDOWS-NEXT: ret @@ -717,9 +717,9 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; WINDOWS-NEXT: mov s1, v0.s[1] ; WINDOWS-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; WINDOWS-NEXT: add x0, sp, #36 -; WINDOWS-NEXT: stp x30, x21, [sp, #48] // 16-byte Folded Spill -; WINDOWS-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x20, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #36 +; WINDOWS-NEXT: stp x21, x30, [sp, #64] // 16-byte Folded Spill ; WINDOWS-NEXT: fcvt d0, s1 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: fcvt s0, d0 @@ -754,9 +754,9 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] ; WINDOWS-NEXT: mov v0.s[3], v2.s[0] ; WINDOWS-NEXT: ld1 { v1.s }[2], [x20] -; WINDOWS-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x20, [sp, #48] // 16-byte Folded Reload ; WINDOWS-NEXT: ld1 { v1.s }[3], [x21] -; WINDOWS-NEXT: ldp x30, x21, [sp, #48] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x21, x30, [sp, #64] // 16-byte Folded Reload ; WINDOWS-NEXT: add sp, sp, #80 ; WINDOWS-NEXT: ret %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) @@ -887,8 +887,8 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill ; WINDOWS-NEXT: fcvt d0, s0 ; WINDOWS-NEXT: add x0, sp, #16 -; WINDOWS-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill -; WINDOWS-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x20, [sp, #32] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x21, x30, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; WINDOWS-NEXT: add x0, sp, #20 @@ -911,9 +911,9 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; WINDOWS-NEXT: ldr s0, [sp, #16] ; WINDOWS-NEXT: ld1 { v0.s }[1], [x19] ; WINDOWS-NEXT: ld1 { v0.s }[2], [x20] -; WINDOWS-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x20, [sp, #32] // 16-byte Folded Reload ; WINDOWS-NEXT: ld1 { v0.s }[3], [x21] -; WINDOWS-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x21, x30, [sp, #48] // 16-byte Folded Reload ; WINDOWS-NEXT: add sp, sp, #64 ; WINDOWS-NEXT: ret %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) @@ -1019,7 +1019,7 @@ define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) noun ; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill ; WINDOWS-NEXT: mov d0, v0.d[1] ; WINDOWS-NEXT: add x0, sp, #40 -; WINDOWS-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x30, [sp, #48] // 16-byte Folded Spill ; WINDOWS-NEXT: add x19, sp, #40 ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 @@ -1032,7 +1032,7 @@ define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) noun ; WINDOWS-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 def $q0 ; WINDOWS-NEXT: ld1 { v1.s }[1], [x19] -; WINDOWS-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x30, [sp, #48] // 16-byte Folded Reload ; WINDOWS-NEXT: mov v0.d[1], v2.d[0] ; WINDOWS-NEXT: // kill: def $d1 killed $d1 killed $q1 ; WINDOWS-NEXT: add sp, sp, #64 @@ -1113,7 +1113,7 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) nounwind ; WINDOWS: // %bb.0: ; WINDOWS-NEXT: sub sp, sp, #48 ; WINDOWS-NEXT: add x0, sp, #28 -; WINDOWS-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; WINDOWS-NEXT: stp x19, x30, [sp, #32] // 16-byte Folded Spill ; WINDOWS-NEXT: str q0, [sp] // 16-byte Folded Spill ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: bl frexp @@ -1124,7 +1124,7 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) nounwind ; WINDOWS-NEXT: bl frexp ; WINDOWS-NEXT: ldr s0, [sp, #28] ; WINDOWS-NEXT: ld1 { v0.s }[1], [x19] -; WINDOWS-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; WINDOWS-NEXT: ldp x19, x30, [sp, #32] // 16-byte Folded Reload ; WINDOWS-NEXT: // kill: def $d0 killed $d0 killed $q0 ; WINDOWS-NEXT: add sp, sp, #48 ; WINDOWS-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll index 83dd240a6540f..246515bbe6d45 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll @@ -38,29 +38,29 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: mov w5, #6 // =0x6 ; CHECK-NEXT: mov w6, #7 // =0x7 ; CHECK-NEXT: mov w7, #8 // =0x8 -; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: stp x19, x20, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x21, x22, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x23, x24, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x25, x26, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x27, x28, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp d8, d9, [sp, #104] // 16-byte Folded Spill +; CHECK-NEXT: stp d10, d11, [sp, #120] // 16-byte Folded Spill +; CHECK-NEXT: stp d12, d13, [sp, #136] // 16-byte Folded Spill +; CHECK-NEXT: stp d14, d15, [sp, #152] // 16-byte Folded Spill ; CHECK-NEXT: str w8, [sp, #8] ; CHECK-NEXT: str w9, [sp] ; CHECK-NEXT: bl callee -; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d14, d15, [sp, #152] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: ldp d12, d13, [sp, #136] // 16-byte Folded Reload +; CHECK-NEXT: ldp d10, d11, [sp, #120] // 16-byte Folded Reload +; CHECK-NEXT: ldp d8, d9, [sp, #104] // 16-byte Folded Reload +; CHECK-NEXT: ldp x27, x28, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x25, x26, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp x23, x24, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x21, x22, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x19, x20, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #176 ; CHECK-NEXT: ret %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll index 1c341e8daf491..648cba57b95cf 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll @@ -8,9 +8,9 @@ declare void @shared_za_callee() "aarch64_inout_za" define void @test_lazy_save() nounwind "aarch64_inout_za" { ; CHECK-LABEL: test_lazy_save: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x30, x29, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #8 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mul x9, x8, x8 @@ -18,21 +18,21 @@ define void @test_lazy_save() nounwind "aarch64_inout_za" { ; CHECK-NEXT: bl __chkstk ; CHECK-NEXT: sub x9, sp, x15, lsl #4 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: stp x9, x8, [x29, #-16] +; CHECK-NEXT: sub x10, x29, #24 +; CHECK-NEXT: stp x9, x8, [x29, #-24] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x0, x29, #24 ; CHECK-NEXT: cbnz x8, .LBB0_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x30, x29, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: sub sp, x29, #8 +; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret call void @private_za_callee() ret void diff --git a/llvm/test/CodeGen/AArch64/swift-async-win.ll b/llvm/test/CodeGen/AArch64/swift-async-win.ll index 94308979b07f8..c71fe8dfe035e 100644 --- a/llvm/test/CodeGen/AArch64/swift-async-win.ll +++ b/llvm/test/CodeGen/AArch64/swift-async-win.ll @@ -19,11 +19,11 @@ define hidden swifttailcc void @"$ss23withCheckedContinuation8function_xSS_yScCy ; CHECK-LABEL: $ss23withCheckedContinuation8function_xSS_yScCyxs5NeverOGXEtYalFTQ0_: ; CHECK: // %bb.0: // %entryresume.0 ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: stp x30, x29, [sp, #24] // 16-byte Folded Spill -; CHECK-NEXT: add x29, sp, #24 -; CHECK-NEXT: str x19, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: adrp x19, __imp_swift_task_dealloc -; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #32 +; CHECK-NEXT: str xzr, [sp, #24] ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: stur x8, [x29, #-8] ; CHECK-NEXT: ldr x20, [x0] @@ -33,10 +33,10 @@ define hidden swifttailcc void @"$ss23withCheckedContinuation8function_xSS_yScCy ; CHECK-NEXT: blr x19 ; CHECK-NEXT: mov x0, x22 ; CHECK-NEXT: blr x19 -; CHECK-NEXT: ldp x30, x29, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov x0, x20 ; CHECK-NEXT: ldr x1, [x20, #8] -; CHECK-NEXT: ldr x19, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: br x1 entryresume.0: diff --git a/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll b/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll index e0ef14ba23921..4d3a9f34a1d16 100644 --- a/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll +++ b/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=arm64-windows -o - %s | FileCheck %s declare void @f() @@ -5,21 +6,21 @@ declare void @g() ; Function Attrs: nounwind define dso_local void @SEHfilter() nounwind "frame-pointer"="all" { -; CHECK-LABEL: @SEHfilter -; CHECK: %bb.0: -; CHECK-NEXT: stp x30, x29, [sp, #-32]! -; CHECK-NEXT: str x19, [sp, #16] -; CHECK-NEXT: ldr w19, [x8] -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: bl g -; CHECK-NEXT: cbz w19, .LBB0_2 -; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: ldr x19, [sp, #16] -; CHECK-NEXT: ldp x30, x29, [sp], #32 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_2: // %if.end.i -; CHECK-NEXT: bl f -; CHECK-NEXT: brk #0x1 +; CHECK-LABEL: SEHfilter: +; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: ldr w19, [x8] +; CHECK-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #8 +; CHECK-NEXT: bl g +; CHECK-NEXT: cbz w19, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %exit +; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %if.end.i +; CHECK-NEXT: bl f +; CHECK-NEXT: brk #0x1 %1 = load i32, ptr undef, align 4 tail call void @g() %tobool.i = icmp eq i32 %1, 0