Skip to content

Commit 153ee1c

Browse files
author
git apple-llvm automerger
committed
Merge commit '1389980ed746' from llvm.org/main into next
2 parents 7b6ea11 + 426e623 commit 153ee1c

File tree

4 files changed

+134
-116
lines changed

4 files changed

+134
-116
lines changed

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,15 +1505,25 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15051505
}
15061506

15071507
bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1508-
// Assume we can't combine the last pop with the sp restore.
1509-
bool CombineAfterCSRBump = false;
1508+
1509+
unsigned ProloguePopSize = PrologueSaveSize;
15101510
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1511+
// With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1512+
// that needs to be popped until we reach the start of the SVE save area.
1513+
// The "FixedObject" stack occurs after the SVE area and must be popped
1514+
// later.
1515+
ProloguePopSize -= FixedObject;
15111516
AfterCSRPopSize += FixedObject;
1512-
} else if (!CombineSPBump && PrologueSaveSize != 0 &&
1517+
}
1518+
1519+
// Assume we can't combine the last pop with the sp restore.
1520+
if (!CombineSPBump && ProloguePopSize != 0 &&
15131521
!IsSwiftCoroPartialReturn) {
15141522
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
15151523
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1516-
AArch64InstrInfo::isSEHInstruction(*Pop))
1524+
AArch64InstrInfo::isSEHInstruction(*Pop) ||
1525+
(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord &&
1526+
isPartOfSVECalleeSaves(Pop)))
15171527
Pop = std::prev(Pop);
15181528
// Converting the last ldp to a post-index ldp is valid only if the last
15191529
// ldp's offset is 0.
@@ -1523,18 +1533,27 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15231533
// may clobber), convert it to a post-index ldp.
15241534
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
15251535
convertCalleeSaveRestoreToSPPrePostIncDec(
1526-
Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
1527-
PrologueSaveSize);
1536+
Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1537+
ProloguePopSize);
1538+
} else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1539+
MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1540+
if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1541+
++AfterLastPop;
1542+
// If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1543+
// callee-save non-SVE registers to move the stack pointer to the start of
1544+
// the SVE area.
1545+
emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1546+
StackOffset::getFixed(ProloguePopSize), TII,
1547+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1548+
&HasWinCFI);
15281549
} else {
1529-
// If not, make sure to emit an add after the last ldp.
1550+
// Otherwise, make sure to emit an add after the last ldp.
15301551
// We're doing this by transferring the size to be restored from the
15311552
// adjustment *before* the CSR pops to the adjustment *after* the CSR
15321553
// pops.
1533-
AfterCSRPopSize += PrologueSaveSize;
1534-
CombineAfterCSRBump = true;
1554+
AfterCSRPopSize += ProloguePopSize;
15351555
}
15361556
}
1537-
15381557
// Move past the restores of the callee-saved registers.
15391558
// If we plan on combining the sp bump of the local stack size and the callee
15401559
// save stack size, we might need to adjust the CSR save and restore offsets.
@@ -1574,6 +1593,17 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15741593
--SEHEpilogueStartI;
15751594
}
15761595

1596+
// Determine the ranges of SVE callee-saves. This is done before emitting any
1597+
// code at the end of the epilogue (for Swift async), which can get in the way
1598+
// of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1599+
auto [PPR, ZPR] = getSVEStackFrameSizes();
1600+
auto [PPRRange, ZPRRange] = partitionSVECS(
1601+
MBB,
1602+
SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
1603+
? MBB.getFirstTerminator()
1604+
: FirstGPRRestoreI,
1605+
PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1606+
15771607
if (HasFP && AFI->hasSwiftAsyncContext()) {
15781608
assert(!IsSwiftCoroPartialReturn);
15791609
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
@@ -1599,14 +1629,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15991629
NumBytes -= PrologueSaveSize;
16001630
assert(NumBytes >= 0 && "Negative stack allocation size!?");
16011631

1602-
auto [PPR, ZPR] = getSVEStackFrameSizes();
1603-
auto [PPRRange, ZPRRange] = partitionSVECS(
1604-
MBB,
1605-
SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
1606-
? MBB.getFirstTerminator()
1607-
: FirstGPRRestoreI,
1608-
PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1609-
16101632
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
16111633
StackOffset SVEStackSize =
16121634
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
@@ -1625,16 +1647,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
16251647
NeedsWinCFI, &HasWinCFI);
16261648
}
16271649

1628-
// Deallocate callee-save non-SVE registers.
1629-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1630-
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1631-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1632-
1633-
// Deallocate fixed objects.
1634-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1635-
StackOffset::getFixed(FixedObject), TII,
1636-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1637-
16381650
// Deallocate callee-save SVE registers.
16391651
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
16401652
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
@@ -1782,7 +1794,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
17821794
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
17831795
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
17841796
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1785-
StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
1797+
StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
17861798
}
17871799
}
17881800

llvm/test/CodeGen/AArch64/framelayout-sve-win.mir

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,8 @@ body: |
380380
; CHECK-NEXT: frame-destroy SEH_EpilogStart
381381
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
382382
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
383-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
384-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
385-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
386-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
383+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
384+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
387385
; CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.4)
388386
; CHECK-NEXT: frame-destroy SEH_SavePReg 4, 0
389387
; CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 1 :: (load (s16) from %stack.3)
@@ -430,10 +428,8 @@ body: |
430428
; CHECK-NEXT: frame-destroy SEH_EpilogStart
431429
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
432430
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
433-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
434-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
435-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
436-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
431+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
432+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
437433
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
438434
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 0
439435
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
@@ -557,10 +553,8 @@ body: |
557553
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
558554
; CHECK-NEXT: $x21, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
559555
; CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 30, 16
560-
; CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
561-
; CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 0
562-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
563-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
556+
; CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
557+
; CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -32
564558
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.21)
565559
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
566560
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.20)
@@ -745,10 +739,8 @@ body: |
745739
; CHECK-NEXT: frame-destroy SEH_EpilogStart
746740
; CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
747741
; CHECK-NEXT: frame-destroy SEH_SetFP
748-
; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
749-
; CHECK-NEXT: frame-destroy SEH_SaveFPLR 0
750-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
751-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
742+
; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
743+
; CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16
752744
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
753745
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
754746
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
@@ -869,10 +861,8 @@ body: |
869861
; CHECK-NEXT: frame-destroy SEH_EpilogStart
870862
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 7, implicit $vg
871863
; CHECK-NEXT: frame-destroy SEH_AllocZ 7
872-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.6)
873-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
874-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
875-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
864+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.6)
865+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
876866
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.8)
877867
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 1
878868
; CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.7)

0 commit comments

Comments
 (0)