Skip to content

Commit 2b42c6b

Browse files
committed
[AArch64][SVE] Avoid extra pop of "FixedObject" with FPAfterSVECalleeSaves
Previously, we would pop `FixedObject`-bytes after deallocating the SVE area, then again as part of the "AfterCSRPopSize". This could be seen in the tests `@f6` and `@f9`. This patch removes the erroneous pop, and refactors `FPAfterSVECalleeSaves` to reuse more of the existing GPR deallocation logic, which allows for post-decrements.
1 parent 03742c5 commit 2b42c6b

File tree

3 files changed

+67
-107
lines changed

3 files changed

+67
-107
lines changed

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,14 +1278,22 @@ void AArch64EpilogueEmitter::emitEpilogue() {
12781278
Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
12791279

12801280
bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1281-
// Assume we can't combine the last pop with the sp restore.
1282-
bool CombineAfterCSRBump = false;
1281+
1282+
unsigned ProloguePopSize = PrologueSaveSize;
12831283
if (FPAfterSVECalleeSaves) {
1284+
// With FPAfterSVECalleeSaves ProloguePopSize is the amount of stack that
1285+
// needs to be popped until we reach the start of the SVE save area. The
1286+
// "FixedObject" stack occurs after the SVE area and must be popped later.
1287+
ProloguePopSize -= FixedObject;
12841288
AfterCSRPopSize += FixedObject;
1285-
} else if (!CombineSPBump && PrologueSaveSize != 0) {
1289+
}
1290+
1291+
// Assume we can't combine the last pop with the sp restore.
1292+
if (!CombineSPBump && ProloguePopSize != 0) {
12861293
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
12871294
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1288-
AArch64InstrInfo::isSEHInstruction(*Pop))
1295+
AArch64InstrInfo::isSEHInstruction(*Pop) ||
1296+
(FPAfterSVECalleeSaves && isSVECalleeSave(Pop)))
12891297
Pop = std::prev(Pop);
12901298
// Converting the last ldp to a post-index ldp is valid only if the last
12911299
// ldp's offset is 0.
@@ -1295,18 +1303,24 @@ void AArch64EpilogueEmitter::emitEpilogue() {
12951303
// may clobber), convert it to a post-index ldp.
12961304
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
12971305
convertCalleeSaveRestoreToSPPrePostIncDec(
1298-
Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
1299-
PrologueSaveSize);
1306+
Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1307+
ProloguePopSize);
1308+
} else if (FPAfterSVECalleeSaves) {
1309+
// If not, and FPAfterSVECalleeSaves is enabled, deallocate callee-save
1310+
// non-SVE registers to move the stack pointer to the start of the SVE
1311+
// area.
1312+
emitFrameOffset(MBB, std::next(Pop), DL, AArch64::SP, AArch64::SP,
1313+
StackOffset::getFixed(ProloguePopSize), TII,
1314+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1315+
&HasWinCFI);
13001316
} else {
1301-
// If not, make sure to emit an add after the last ldp.
1317+
// Otherwise, make sure to emit an add after the last ldp.
13021318
// We're doing this by transferring the size to be restored from the
13031319
// adjustment *before* the CSR pops to the adjustment *after* the CSR
13041320
// pops.
1305-
AfterCSRPopSize += PrologueSaveSize;
1306-
CombineAfterCSRBump = true;
1321+
AfterCSRPopSize += ProloguePopSize;
13071322
}
13081323
}
1309-
13101324
// Move past the restores of the callee-saved registers.
13111325
// If we plan on combining the sp bump of the local stack size and the callee
13121326
// save stack size, we might need to adjust the CSR save and restore offsets.
@@ -1394,16 +1408,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13941408
NeedsWinCFI, &HasWinCFI);
13951409
}
13961410

1397-
// Deallocate callee-save non-SVE registers.
1398-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1399-
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1400-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1401-
1402-
// Deallocate fixed objects.
1403-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1404-
StackOffset::getFixed(FixedObject), TII,
1405-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1406-
14071411
// Deallocate callee-save SVE registers.
14081412
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
14091413
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
@@ -1522,7 +1526,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15221526
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
15231527
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
15241528
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1525-
StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
1529+
StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
15261530
}
15271531
}
15281532

llvm/test/CodeGen/AArch64/framelayout-sve-win.mir

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,8 @@ body: |
380380
; CHECK-NEXT: frame-destroy SEH_EpilogStart
381381
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
382382
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
383-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
384-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
385-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
386-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
383+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
384+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
387385
; CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.4)
388386
; CHECK-NEXT: frame-destroy SEH_SavePReg 4, 0
389387
; CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 1 :: (load (s16) from %stack.3)
@@ -430,10 +428,8 @@ body: |
430428
; CHECK-NEXT: frame-destroy SEH_EpilogStart
431429
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
432430
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
433-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
434-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
435-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
436-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
431+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
432+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
437433
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
438434
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 0
439435
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
@@ -557,10 +553,8 @@ body: |
557553
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
558554
; CHECK-NEXT: $x21, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
559555
; CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 30, 16
560-
; CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
561-
; CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 0
562-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
563-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
556+
; CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
557+
; CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -32
564558
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.21)
565559
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
566560
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.20)
@@ -745,10 +739,8 @@ body: |
745739
; CHECK-NEXT: frame-destroy SEH_EpilogStart
746740
; CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
747741
; CHECK-NEXT: frame-destroy SEH_SetFP
748-
; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
749-
; CHECK-NEXT: frame-destroy SEH_SaveFPLR 0
750-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
751-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
742+
; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
743+
; CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16
752744
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
753745
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
754746
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
@@ -869,10 +861,8 @@ body: |
869861
; CHECK-NEXT: frame-destroy SEH_EpilogStart
870862
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 7, implicit $vg
871863
; CHECK-NEXT: frame-destroy SEH_AllocZ 7
872-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.6)
873-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
874-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
875-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
864+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.6)
865+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
876866
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.8)
877867
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 1
878868
; CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.7)

llvm/test/CodeGen/AArch64/win-sve.ll

Lines changed: 32 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,8 @@ define i32 @f(<vscale x 2 x i64> %x) {
7575
; CHECK-NEXT: .seh_startepilogue
7676
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
7777
; CHECK-NEXT: .seh_save_reg x30, 8
78-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
79-
; CHECK-NEXT: .seh_save_reg x28, 0
80-
; CHECK-NEXT: add sp, sp, #16
81-
; CHECK-NEXT: .seh_stackalloc 16
78+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
79+
; CHECK-NEXT: .seh_save_reg_x x28, 16
8280
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
8381
; CHECK-NEXT: .seh_save_zreg z8, 2
8482
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -234,10 +232,8 @@ define void @f2(i64 %n, <vscale x 2 x i64> %x) {
234232
; CHECK-NEXT: .seh_save_fplr 16
235233
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
236234
; CHECK-NEXT: .seh_save_reg x28, 8
237-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
238-
; CHECK-NEXT: .seh_save_reg x19, 0
239-
; CHECK-NEXT: add sp, sp, #32
240-
; CHECK-NEXT: .seh_stackalloc 32
235+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
236+
; CHECK-NEXT: .seh_save_reg_x x19, 32
241237
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
242238
; CHECK-NEXT: .seh_save_zreg z8, 2
243239
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -384,10 +380,8 @@ define void @f3(i64 %n, <vscale x 2 x i64> %x) {
384380
; CHECK-NEXT: .seh_stackalloc 16
385381
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
386382
; CHECK-NEXT: .seh_save_reg x30, 8
387-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
388-
; CHECK-NEXT: .seh_save_reg x28, 0
389-
; CHECK-NEXT: add sp, sp, #16
390-
; CHECK-NEXT: .seh_stackalloc 16
383+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
384+
; CHECK-NEXT: .seh_save_reg_x x28, 16
391385
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
392386
; CHECK-NEXT: .seh_save_zreg z8, 2
393387
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -538,10 +532,8 @@ define void @f4(i64 %n, <vscale x 2 x i64> %x) {
538532
; CHECK-NEXT: .seh_stackalloc 16
539533
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
540534
; CHECK-NEXT: .seh_save_reg x30, 8
541-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
542-
; CHECK-NEXT: .seh_save_reg x28, 0
543-
; CHECK-NEXT: add sp, sp, #16
544-
; CHECK-NEXT: .seh_stackalloc 16
535+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
536+
; CHECK-NEXT: .seh_save_reg_x x28, 16
545537
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
546538
; CHECK-NEXT: .seh_save_zreg z8, 2
547539
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -702,10 +694,8 @@ define void @f5(i64 %n, <vscale x 2 x i64> %x) {
702694
; CHECK-NEXT: .seh_save_fplr 16
703695
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
704696
; CHECK-NEXT: .seh_save_reg x28, 8
705-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
706-
; CHECK-NEXT: .seh_save_reg x19, 0
707-
; CHECK-NEXT: add sp, sp, #32
708-
; CHECK-NEXT: .seh_stackalloc 32
697+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
698+
; CHECK-NEXT: .seh_save_reg_x x19, 32
709699
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
710700
; CHECK-NEXT: .seh_save_zreg z8, 2
711701
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -860,10 +850,10 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
860850
; CHECK-NEXT: stur x0, [x8, #16]
861851
; CHECK-NEXT: addvl x8, x29, #18
862852
; CHECK-NEXT: ldr x1, [x8, #32]
863-
; CHECK-NEXT: .Ltmp0:
853+
; CHECK-NEXT: .Ltmp0: // EH_LABEL
864854
; CHECK-NEXT: add x0, x19, #0
865855
; CHECK-NEXT: bl g6
866-
; CHECK-NEXT: .Ltmp1:
856+
; CHECK-NEXT: .Ltmp1: // EH_LABEL
867857
; CHECK-NEXT: // %bb.1: // %invoke.cont
868858
; CHECK-NEXT: .seh_startepilogue
869859
; CHECK-NEXT: add sp, sp, #64
@@ -872,10 +862,8 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
872862
; CHECK-NEXT: .seh_save_fplr 16
873863
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
874864
; CHECK-NEXT: .seh_save_reg x28, 8
875-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
876-
; CHECK-NEXT: .seh_save_reg x19, 0
877-
; CHECK-NEXT: add sp, sp, #32
878-
; CHECK-NEXT: .seh_stackalloc 32
865+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
866+
; CHECK-NEXT: .seh_save_reg_x x19, 32
879867
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
880868
; CHECK-NEXT: .seh_save_zreg z8, 2
881869
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -932,8 +920,6 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
932920
; CHECK-NEXT: .seh_save_preg p14, 10
933921
; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload
934922
; CHECK-NEXT: .seh_save_preg p15, 11
935-
; CHECK-NEXT: add sp, sp, #16
936-
; CHECK-NEXT: .seh_stackalloc 16
937923
; CHECK-NEXT: addvl sp, sp, #18
938924
; CHECK-NEXT: .seh_allocz 18
939925
; CHECK-NEXT: add sp, sp, #16
@@ -1024,10 +1010,8 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
10241010
; CHECK-NEXT: .seh_save_fplr 16
10251011
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
10261012
; CHECK-NEXT: .seh_save_reg x28, 8
1027-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
1028-
; CHECK-NEXT: .seh_save_reg x19, 0
1029-
; CHECK-NEXT: add sp, sp, #32
1030-
; CHECK-NEXT: .seh_stackalloc 32
1013+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
1014+
; CHECK-NEXT: .seh_save_reg_x x19, 32
10311015
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
10321016
; CHECK-NEXT: .seh_save_zreg z8, 2
10331017
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1144,10 +1128,8 @@ define void @f8(<vscale x 2 x i64> %v) {
11441128
; CHECK-NEXT: //APP
11451129
; CHECK-NEXT: //NO_APP
11461130
; CHECK-NEXT: .seh_startepilogue
1147-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1148-
; CHECK-NEXT: .seh_save_reg x30, 0
1149-
; CHECK-NEXT: add sp, sp, #16
1150-
; CHECK-NEXT: .seh_stackalloc 16
1131+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1132+
; CHECK-NEXT: .seh_save_reg_x x30, 16
11511133
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
11521134
; CHECK-NEXT: .seh_save_zreg z8, 0
11531135
; CHECK-NEXT: addvl sp, sp, #1
@@ -1196,14 +1178,10 @@ define void @f9(<vscale x 2 x i64> %v, ...) {
11961178
; CHECK-NEXT: //APP
11971179
; CHECK-NEXT: //NO_APP
11981180
; CHECK-NEXT: .seh_startepilogue
1199-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1200-
; CHECK-NEXT: .seh_save_reg x30, 0
1201-
; CHECK-NEXT: add sp, sp, #16
1202-
; CHECK-NEXT: .seh_stackalloc 16
1181+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1182+
; CHECK-NEXT: .seh_save_reg_x x30, 16
12031183
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
12041184
; CHECK-NEXT: .seh_save_zreg z8, 0
1205-
; CHECK-NEXT: add sp, sp, #64
1206-
; CHECK-NEXT: .seh_stackalloc 64
12071185
; CHECK-NEXT: addvl sp, sp, #1
12081186
; CHECK-NEXT: .seh_allocz 1
12091187
; CHECK-NEXT: add sp, sp, #64
@@ -1301,10 +1279,8 @@ define void @f10(i64 %n, <vscale x 2 x i64> %x) "frame-pointer"="all" {
13011279
; CHECK-NEXT: .seh_stackalloc 16
13021280
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
13031281
; CHECK-NEXT: .seh_save_fplr 8
1304-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1305-
; CHECK-NEXT: .seh_save_reg x28, 0
1306-
; CHECK-NEXT: add sp, sp, #32
1307-
; CHECK-NEXT: .seh_stackalloc 32
1282+
; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
1283+
; CHECK-NEXT: .seh_save_reg_x x28, 32
13081284
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
13091285
; CHECK-NEXT: .seh_save_zreg z8, 2
13101286
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1390,10 +1366,8 @@ define i32 @f11(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible
13901366
; CHECK-NEXT: //NO_APP
13911367
; CHECK-NEXT: str d0, [sp, #8]
13921368
; CHECK-NEXT: .seh_startepilogue
1393-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1394-
; CHECK-NEXT: .seh_save_reg x30, 0
1395-
; CHECK-NEXT: add sp, sp, #16
1396-
; CHECK-NEXT: .seh_stackalloc 16
1369+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1370+
; CHECK-NEXT: .seh_save_reg_x x30, 16
13971371
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
13981372
; CHECK-NEXT: .seh_save_zreg z8, 0
13991373
; CHECK-NEXT: addvl sp, sp, #1
@@ -1431,10 +1405,8 @@ define i32 @f12(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible
14311405
; CHECK-NEXT: .seh_startepilogue
14321406
; CHECK-NEXT: addvl sp, sp, #1
14331407
; CHECK-NEXT: .seh_allocz 1
1434-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1435-
; CHECK-NEXT: .seh_save_reg x30, 0
1436-
; CHECK-NEXT: add sp, sp, #16
1437-
; CHECK-NEXT: .seh_stackalloc 16
1408+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1409+
; CHECK-NEXT: .seh_save_reg_x x30, 16
14381410
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
14391411
; CHECK-NEXT: .seh_save_zreg z8, 0
14401412
; CHECK-NEXT: addvl sp, sp, #1
@@ -1475,10 +1447,8 @@ define i32 @f13(double %d, <vscale x 4 x i32> %vs) "frame-pointer"="all" {
14751447
; CHECK-NEXT: .seh_startepilogue
14761448
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
14771449
; CHECK-NEXT: .seh_save_fplr 8
1478-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1479-
; CHECK-NEXT: .seh_save_reg x28, 0
1480-
; CHECK-NEXT: add sp, sp, #32
1481-
; CHECK-NEXT: .seh_stackalloc 32
1450+
; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
1451+
; CHECK-NEXT: .seh_save_reg_x x28, 32
14821452
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
14831453
; CHECK-NEXT: .seh_save_zreg z8, 0
14841454
; CHECK-NEXT: addvl sp, sp, #1
@@ -1521,10 +1491,8 @@ define i32 @f14(double %d, <vscale x 4 x i32> %vs) "frame-pointer"="all" {
15211491
; CHECK-NEXT: .seh_allocz 1
15221492
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
15231493
; CHECK-NEXT: .seh_save_fplr 8
1524-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1525-
; CHECK-NEXT: .seh_save_reg x28, 0
1526-
; CHECK-NEXT: add sp, sp, #32
1527-
; CHECK-NEXT: .seh_stackalloc 32
1494+
; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
1495+
; CHECK-NEXT: .seh_save_reg_x x28, 32
15281496
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
15291497
; CHECK-NEXT: .seh_save_zreg z8, 0
15301498
; CHECK-NEXT: addvl sp, sp, #1
@@ -1572,10 +1540,8 @@ define tailcc void @f15(double %d, <vscale x 4 x i32> %vs, [9 x i64], i32 %i) {
15721540
; CHECK-NEXT: .seh_stackalloc 16
15731541
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
15741542
; CHECK-NEXT: .seh_save_reg x30, 8
1575-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1576-
; CHECK-NEXT: .seh_save_reg x28, 0
1577-
; CHECK-NEXT: add sp, sp, #16
1578-
; CHECK-NEXT: .seh_stackalloc 16
1543+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
1544+
; CHECK-NEXT: .seh_save_reg_x x28, 16
15791545
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
15801546
; CHECK-NEXT: .seh_save_zreg z8, 0
15811547
; CHECK-NEXT: addvl sp, sp, #1

0 commit comments

Comments
 (0)