Skip to content

Commit 414b3ea

Browse files
committed
[AArch64][SVE] Avoid extra pop of "FixedObject" with FPAfterSVECalleeSaves
Previously, we would pop `FixedObject`-bytes after deallocating the SVE area, then again as part of the "AfterCSRPopSize". This could be seen in the tests `@f6` and `@f9`. This patch removes the erroneous pop, and refactors `FPAfterSVECalleeSaves` to reuse more of the existing GPR deallocation logic, which allows for post-decrements.
1 parent 2a02d57 commit 414b3ea

File tree

3 files changed

+69
-107
lines changed

3 files changed

+69
-107
lines changed

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13601360
}
13611361

13621362
bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1363-
// Assume we can't combine the last pop with the sp restore.
1364-
bool CombineAfterCSRBump = false;
1363+
1364+
unsigned ProloguePopSize = PrologueSaveSize;
13651365
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1366+
// With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1367+
// that needs to be popped until we reach the start of the SVE save area.
1368+
// The "FixedObject" stack occurs after the SVE area and must be popped
1369+
// later.
1370+
ProloguePopSize -= FixedObject;
13661371
AfterCSRPopSize += FixedObject;
1367-
} else if (!CombineSPBump && PrologueSaveSize != 0) {
1372+
}
1373+
1374+
// Assume we can't combine the last pop with the sp restore.
1375+
if (!CombineSPBump && ProloguePopSize != 0) {
13681376
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
13691377
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1370-
AArch64InstrInfo::isSEHInstruction(*Pop))
1378+
AArch64InstrInfo::isSEHInstruction(*Pop) ||
1379+
(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord &&
1380+
isPartOfSVECalleeSaves(Pop)))
13711381
Pop = std::prev(Pop);
13721382
// Converting the last ldp to a post-index ldp is valid only if the last
13731383
// ldp's offset is 0.
@@ -1377,18 +1387,24 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13771387
// may clobber), convert it to a post-index ldp.
13781388
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
13791389
convertCalleeSaveRestoreToSPPrePostIncDec(
1380-
Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
1381-
PrologueSaveSize);
1390+
Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1391+
ProloguePopSize);
1392+
} else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1393+
// If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1394+
// callee-save non-SVE registers to move the stack pointer to the start of
1395+
// the SVE area.
1396+
emitFrameOffset(MBB, std::next(Pop), DL, AArch64::SP, AArch64::SP,
1397+
StackOffset::getFixed(ProloguePopSize), TII,
1398+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1399+
&HasWinCFI);
13821400
} else {
1383-
// If not, make sure to emit an add after the last ldp.
1401+
// Otherwise, make sure to emit an add after the last ldp.
13841402
// We're doing this by transferring the size to be restored from the
13851403
// adjustment *before* the CSR pops to the adjustment *after* the CSR
13861404
// pops.
1387-
AfterCSRPopSize += PrologueSaveSize;
1388-
CombineAfterCSRBump = true;
1405+
AfterCSRPopSize += ProloguePopSize;
13891406
}
13901407
}
1391-
13921408
// Move past the restores of the callee-saved registers.
13931409
// If we plan on combining the sp bump of the local stack size and the callee
13941410
// save stack size, we might need to adjust the CSR save and restore offsets.
@@ -1467,16 +1483,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14671483
NeedsWinCFI, &HasWinCFI);
14681484
}
14691485

1470-
// Deallocate callee-save non-SVE registers.
1471-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1472-
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1473-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1474-
1475-
// Deallocate fixed objects.
1476-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1477-
StackOffset::getFixed(FixedObject), TII,
1478-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1479-
14801486
// Deallocate callee-save SVE registers.
14811487
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
14821488
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
@@ -1619,7 +1625,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
16191625
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
16201626
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
16211627
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1622-
StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
1628+
StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
16231629
}
16241630
}
16251631

llvm/test/CodeGen/AArch64/framelayout-sve-win.mir

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,8 @@ body: |
380380
; CHECK-NEXT: frame-destroy SEH_EpilogStart
381381
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
382382
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
383-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
384-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
385-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
386-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
383+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
384+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
387385
; CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.4)
388386
; CHECK-NEXT: frame-destroy SEH_SavePReg 4, 0
389387
; CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 1 :: (load (s16) from %stack.3)
@@ -430,10 +428,8 @@ body: |
430428
; CHECK-NEXT: frame-destroy SEH_EpilogStart
431429
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
432430
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
433-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
434-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
435-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
436-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
431+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
432+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
437433
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
438434
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 0
439435
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
@@ -557,10 +553,8 @@ body: |
557553
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
558554
; CHECK-NEXT: $x21, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
559555
; CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 30, 16
560-
; CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
561-
; CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 0
562-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
563-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
556+
; CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
557+
; CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -32
564558
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.21)
565559
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
566560
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.20)
@@ -745,10 +739,8 @@ body: |
745739
; CHECK-NEXT: frame-destroy SEH_EpilogStart
746740
; CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
747741
; CHECK-NEXT: frame-destroy SEH_SetFP
748-
; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
749-
; CHECK-NEXT: frame-destroy SEH_SaveFPLR 0
750-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
751-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
742+
; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
743+
; CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16
752744
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
753745
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
754746
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
@@ -869,10 +861,8 @@ body: |
869861
; CHECK-NEXT: frame-destroy SEH_EpilogStart
870862
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 7, implicit $vg
871863
; CHECK-NEXT: frame-destroy SEH_AllocZ 7
872-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.6)
873-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
874-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
875-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
864+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.6)
865+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
876866
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.8)
877867
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 1
878868
; CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.7)

llvm/test/CodeGen/AArch64/win-sve.ll

Lines changed: 32 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,8 @@ define i32 @f(<vscale x 2 x i64> %x) {
7575
; CHECK-NEXT: .seh_startepilogue
7676
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
7777
; CHECK-NEXT: .seh_save_reg x30, 8
78-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
79-
; CHECK-NEXT: .seh_save_reg x28, 0
80-
; CHECK-NEXT: add sp, sp, #16
81-
; CHECK-NEXT: .seh_stackalloc 16
78+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
79+
; CHECK-NEXT: .seh_save_reg_x x28, 16
8280
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
8381
; CHECK-NEXT: .seh_save_zreg z8, 2
8482
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -234,10 +232,8 @@ define void @f2(i64 %n, <vscale x 2 x i64> %x) {
234232
; CHECK-NEXT: .seh_save_fplr 16
235233
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
236234
; CHECK-NEXT: .seh_save_reg x28, 8
237-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
238-
; CHECK-NEXT: .seh_save_reg x19, 0
239-
; CHECK-NEXT: add sp, sp, #32
240-
; CHECK-NEXT: .seh_stackalloc 32
235+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
236+
; CHECK-NEXT: .seh_save_reg_x x19, 32
241237
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
242238
; CHECK-NEXT: .seh_save_zreg z8, 2
243239
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -384,10 +380,8 @@ define void @f3(i64 %n, <vscale x 2 x i64> %x) {
384380
; CHECK-NEXT: .seh_stackalloc 16
385381
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
386382
; CHECK-NEXT: .seh_save_reg x30, 8
387-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
388-
; CHECK-NEXT: .seh_save_reg x28, 0
389-
; CHECK-NEXT: add sp, sp, #16
390-
; CHECK-NEXT: .seh_stackalloc 16
383+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
384+
; CHECK-NEXT: .seh_save_reg_x x28, 16
391385
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
392386
; CHECK-NEXT: .seh_save_zreg z8, 2
393387
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -538,10 +532,8 @@ define void @f4(i64 %n, <vscale x 2 x i64> %x) {
538532
; CHECK-NEXT: .seh_stackalloc 16
539533
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
540534
; CHECK-NEXT: .seh_save_reg x30, 8
541-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
542-
; CHECK-NEXT: .seh_save_reg x28, 0
543-
; CHECK-NEXT: add sp, sp, #16
544-
; CHECK-NEXT: .seh_stackalloc 16
535+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
536+
; CHECK-NEXT: .seh_save_reg_x x28, 16
545537
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
546538
; CHECK-NEXT: .seh_save_zreg z8, 2
547539
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -702,10 +694,8 @@ define void @f5(i64 %n, <vscale x 2 x i64> %x) {
702694
; CHECK-NEXT: .seh_save_fplr 16
703695
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
704696
; CHECK-NEXT: .seh_save_reg x28, 8
705-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
706-
; CHECK-NEXT: .seh_save_reg x19, 0
707-
; CHECK-NEXT: add sp, sp, #32
708-
; CHECK-NEXT: .seh_stackalloc 32
697+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
698+
; CHECK-NEXT: .seh_save_reg_x x19, 32
709699
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
710700
; CHECK-NEXT: .seh_save_zreg z8, 2
711701
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -860,10 +850,10 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
860850
; CHECK-NEXT: stur x0, [x8, #16]
861851
; CHECK-NEXT: addvl x8, x29, #18
862852
; CHECK-NEXT: ldr x1, [x8, #32]
863-
; CHECK-NEXT: .Ltmp0:
853+
; CHECK-NEXT: .Ltmp0: // EH_LABEL
864854
; CHECK-NEXT: add x0, x19, #0
865855
; CHECK-NEXT: bl g6
866-
; CHECK-NEXT: .Ltmp1:
856+
; CHECK-NEXT: .Ltmp1: // EH_LABEL
867857
; CHECK-NEXT: // %bb.1: // %invoke.cont
868858
; CHECK-NEXT: .seh_startepilogue
869859
; CHECK-NEXT: add sp, sp, #64
@@ -872,10 +862,8 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
872862
; CHECK-NEXT: .seh_save_fplr 16
873863
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
874864
; CHECK-NEXT: .seh_save_reg x28, 8
875-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
876-
; CHECK-NEXT: .seh_save_reg x19, 0
877-
; CHECK-NEXT: add sp, sp, #32
878-
; CHECK-NEXT: .seh_stackalloc 32
865+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
866+
; CHECK-NEXT: .seh_save_reg_x x19, 32
879867
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
880868
; CHECK-NEXT: .seh_save_zreg z8, 2
881869
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -932,8 +920,6 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
932920
; CHECK-NEXT: .seh_save_preg p14, 10
933921
; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload
934922
; CHECK-NEXT: .seh_save_preg p15, 11
935-
; CHECK-NEXT: add sp, sp, #16
936-
; CHECK-NEXT: .seh_stackalloc 16
937923
; CHECK-NEXT: addvl sp, sp, #18
938924
; CHECK-NEXT: .seh_allocz 18
939925
; CHECK-NEXT: add sp, sp, #16
@@ -1024,10 +1010,8 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
10241010
; CHECK-NEXT: .seh_save_fplr 16
10251011
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
10261012
; CHECK-NEXT: .seh_save_reg x28, 8
1027-
; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
1028-
; CHECK-NEXT: .seh_save_reg x19, 0
1029-
; CHECK-NEXT: add sp, sp, #32
1030-
; CHECK-NEXT: .seh_stackalloc 32
1013+
; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
1014+
; CHECK-NEXT: .seh_save_reg_x x19, 32
10311015
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
10321016
; CHECK-NEXT: .seh_save_zreg z8, 2
10331017
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1144,10 +1128,8 @@ define void @f8(<vscale x 2 x i64> %v) {
11441128
; CHECK-NEXT: //APP
11451129
; CHECK-NEXT: //NO_APP
11461130
; CHECK-NEXT: .seh_startepilogue
1147-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1148-
; CHECK-NEXT: .seh_save_reg x30, 0
1149-
; CHECK-NEXT: add sp, sp, #16
1150-
; CHECK-NEXT: .seh_stackalloc 16
1131+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1132+
; CHECK-NEXT: .seh_save_reg_x x30, 16
11511133
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
11521134
; CHECK-NEXT: .seh_save_zreg z8, 0
11531135
; CHECK-NEXT: addvl sp, sp, #1
@@ -1196,14 +1178,10 @@ define void @f9(<vscale x 2 x i64> %v, ...) {
11961178
; CHECK-NEXT: //APP
11971179
; CHECK-NEXT: //NO_APP
11981180
; CHECK-NEXT: .seh_startepilogue
1199-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1200-
; CHECK-NEXT: .seh_save_reg x30, 0
1201-
; CHECK-NEXT: add sp, sp, #16
1202-
; CHECK-NEXT: .seh_stackalloc 16
1181+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1182+
; CHECK-NEXT: .seh_save_reg_x x30, 16
12031183
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
12041184
; CHECK-NEXT: .seh_save_zreg z8, 0
1205-
; CHECK-NEXT: add sp, sp, #64
1206-
; CHECK-NEXT: .seh_stackalloc 64
12071185
; CHECK-NEXT: addvl sp, sp, #1
12081186
; CHECK-NEXT: .seh_allocz 1
12091187
; CHECK-NEXT: add sp, sp, #64
@@ -1301,10 +1279,8 @@ define void @f10(i64 %n, <vscale x 2 x i64> %x) "frame-pointer"="all" {
13011279
; CHECK-NEXT: .seh_stackalloc 16
13021280
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
13031281
; CHECK-NEXT: .seh_save_fplr 8
1304-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1305-
; CHECK-NEXT: .seh_save_reg x28, 0
1306-
; CHECK-NEXT: add sp, sp, #32
1307-
; CHECK-NEXT: .seh_stackalloc 32
1282+
; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
1283+
; CHECK-NEXT: .seh_save_reg_x x28, 32
13081284
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
13091285
; CHECK-NEXT: .seh_save_zreg z8, 2
13101286
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1390,10 +1366,8 @@ define i32 @f11(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible
13901366
; CHECK-NEXT: //NO_APP
13911367
; CHECK-NEXT: str d0, [sp, #8]
13921368
; CHECK-NEXT: .seh_startepilogue
1393-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1394-
; CHECK-NEXT: .seh_save_reg x30, 0
1395-
; CHECK-NEXT: add sp, sp, #16
1396-
; CHECK-NEXT: .seh_stackalloc 16
1369+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1370+
; CHECK-NEXT: .seh_save_reg_x x30, 16
13971371
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
13981372
; CHECK-NEXT: .seh_save_zreg z8, 0
13991373
; CHECK-NEXT: addvl sp, sp, #1
@@ -1431,10 +1405,8 @@ define i32 @f12(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible
14311405
; CHECK-NEXT: .seh_startepilogue
14321406
; CHECK-NEXT: addvl sp, sp, #1
14331407
; CHECK-NEXT: .seh_allocz 1
1434-
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
1435-
; CHECK-NEXT: .seh_save_reg x30, 0
1436-
; CHECK-NEXT: add sp, sp, #16
1437-
; CHECK-NEXT: .seh_stackalloc 16
1408+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1409+
; CHECK-NEXT: .seh_save_reg_x x30, 16
14381410
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
14391411
; CHECK-NEXT: .seh_save_zreg z8, 0
14401412
; CHECK-NEXT: addvl sp, sp, #1
@@ -1475,10 +1447,8 @@ define i32 @f13(double %d, <vscale x 4 x i32> %vs) "frame-pointer"="all" {
14751447
; CHECK-NEXT: .seh_startepilogue
14761448
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
14771449
; CHECK-NEXT: .seh_save_fplr 8
1478-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1479-
; CHECK-NEXT: .seh_save_reg x28, 0
1480-
; CHECK-NEXT: add sp, sp, #32
1481-
; CHECK-NEXT: .seh_stackalloc 32
1450+
; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
1451+
; CHECK-NEXT: .seh_save_reg_x x28, 32
14821452
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
14831453
; CHECK-NEXT: .seh_save_zreg z8, 0
14841454
; CHECK-NEXT: addvl sp, sp, #1
@@ -1521,10 +1491,8 @@ define i32 @f14(double %d, <vscale x 4 x i32> %vs) "frame-pointer"="all" {
15211491
; CHECK-NEXT: .seh_allocz 1
15221492
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
15231493
; CHECK-NEXT: .seh_save_fplr 8
1524-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1525-
; CHECK-NEXT: .seh_save_reg x28, 0
1526-
; CHECK-NEXT: add sp, sp, #32
1527-
; CHECK-NEXT: .seh_stackalloc 32
1494+
; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
1495+
; CHECK-NEXT: .seh_save_reg_x x28, 32
15281496
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
15291497
; CHECK-NEXT: .seh_save_zreg z8, 0
15301498
; CHECK-NEXT: addvl sp, sp, #1
@@ -1572,10 +1540,8 @@ define tailcc void @f15(double %d, <vscale x 4 x i32> %vs, [9 x i64], i32 %i) {
15721540
; CHECK-NEXT: .seh_stackalloc 16
15731541
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
15741542
; CHECK-NEXT: .seh_save_reg x30, 8
1575-
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
1576-
; CHECK-NEXT: .seh_save_reg x28, 0
1577-
; CHECK-NEXT: add sp, sp, #16
1578-
; CHECK-NEXT: .seh_stackalloc 16
1543+
; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
1544+
; CHECK-NEXT: .seh_save_reg_x x28, 16
15791545
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
15801546
; CHECK-NEXT: .seh_save_zreg z8, 0
15811547
; CHECK-NEXT: addvl sp, sp, #1

0 commit comments

Comments
 (0)