Skip to content

Commit 698e146

Browse files
committed
Remove BP support
1 parent d9c517b commit 698e146

File tree

3 files changed

+24
-53
lines changed

3 files changed

+24
-53
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 14 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2535,43 +2535,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25352535
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
25362536
NeedsWinCFI, &HasWinCFI);
25372537
} else if (SVEStackSize) {
2538-
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
25392538
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
2540-
Register BaseForSVEDealloc = [&]() -> Register {
2541-
// With stack realignment we must use the FP to restore SVE CSRs (as both
2542-
// the SP and BP can't be used due to the unknown alignment padding).
2543-
if (AFI->isStackRealigned())
2544-
return AArch64::FP;
2545-
// With variable sized objects on the stack, we can use the BP or FP to
2546-
// restore the SVE callee saves.
2547-
if (MFI.hasVarSizedObjects()) {
2548-
if (DeallocateBefore && !AFI->hasStackHazardSlotIndex()) {
2549-
// If there's SVE locals and no hazard padding we can do:
2550-
// ADDVL SP, FP, #(-SVECalleeSavedSize)
2551-
return AArch64::FP;
2552-
}
2553-
// Otherwise, we can choose between:
2554-
// SUB TMP, FP, #CalleeSaveBaseOffset
2555-
// ADDVL SP, TMP, #(-SVECalleeSavedSize)
2556-
// OR:
2557-
// ADD SP, BP, #NumBytes
2558-
// ADDVL SP, SP, #DeallocateBefore
2559-
// Here we choose the latter as the "ADDVL" can be omitted if there's no
2560-
// SVE locals (and if we're here we either don't have SVE locals or have
2561-
// hazard padding).
2562-
assert(RegInfo->hasBasePointer(MF) && "Expected base pointer!");
2563-
return RegInfo->getBaseRegister();
2564-
}
2565-
// In the standard case we use the SP.
2566-
return AArch64::SP;
2567-
}();
2568-
// If we have any SVE callee saves they must be restored now.
2569-
bool MustRestoreSVECalleeSaves = SVECalleeSavedSize != 0;
2570-
// If the base for deallocation is the SP we must deallocate the SVE area
2571-
// regardless of if we have SVE callee saves. For any other base the SVE
2572-
// area will be implicitly deallocated when we set the SP to the FP.
2573-
bool MustDeallocateSVEArea = BaseForSVEDealloc == AArch64::SP;
2574-
if (MustRestoreSVECalleeSaves && BaseForSVEDealloc == AArch64::FP) {
2539+
// If we have stack realignment or variable-sized objects we must use the
2540+
// FP to restore SVE callee saves (as there is an unknown amount of
2541+
// data/padding between the SP and SVE CS area).
2542+
Register BaseForSVEDealloc =
2543+
AFI->isStackRealigned() || MFI.hasVarSizedObjects() ? AArch64::FP
2544+
: AArch64::SP;
2545+
if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
25752546
Register CalleeSaveBase = AArch64::FP;
25762547
if (int64_t CalleeSaveBaseOffset =
25772548
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
@@ -2589,12 +2560,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25892560
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
25902561
StackOffset::getScalable(-SVECalleeSavedSize), TII,
25912562
MachineInstr::FrameDestroy);
2592-
} else if (MustRestoreSVECalleeSaves || MustDeallocateSVEArea) {
2563+
} else if (BaseForSVEDealloc == AArch64::SP) {
25932564
if (SVECalleeSavedSize) {
25942565
// Deallocate the non-SVE locals first before we can deallocate (and
25952566
// restore callee saves) from the SVE area.
25962567
emitFrameOffset(
2597-
MBB, RestoreBegin, DL, AArch64::SP, BaseForSVEDealloc,
2568+
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
25982569
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
25992570
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
26002571
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
@@ -2607,13 +2578,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
26072578
SVEStackSize +
26082579
StackOffset::getFixed(NumBytes + PrologueSaveSize));
26092580

2610-
if (MustDeallocateSVEArea) {
2611-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2612-
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
2613-
NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
2614-
DeallocateAfter +
2615-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
2616-
}
2581+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2582+
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
2583+
NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
2584+
DeallocateAfter +
2585+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
26172586
}
26182587
if (EmitCFI)
26192588
emitCalleeSavedSVERestores(MBB, RestoreEnd);

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3227,7 +3227,7 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x
32273227
; CHECK0-NEXT: mov w0, #22647 // =0x5877
32283228
; CHECK0-NEXT: movk w0, #59491, lsl #16
32293229
; CHECK0-NEXT: .cfi_restore vg
3230-
; CHECK0-NEXT: mov sp, x19
3230+
; CHECK0-NEXT: addvl sp, x29, #-18
32313231
; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
32323232
; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
32333233
; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -3363,7 +3363,8 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x
33633363
; CHECK64-NEXT: mov w0, #22647 // =0x5877
33643364
; CHECK64-NEXT: movk w0, #59491, lsl #16
33653365
; CHECK64-NEXT: .cfi_restore vg
3366-
; CHECK64-NEXT: add sp, x19, #64
3366+
; CHECK64-NEXT: sub x8, x29, #64
3367+
; CHECK64-NEXT: addvl sp, x8, #-18
33673368
; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
33683369
; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
33693370
; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -3504,7 +3505,8 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x
35043505
; CHECK1024-NEXT: mov w0, #22647 // =0x5877
35053506
; CHECK1024-NEXT: movk w0, #59491, lsl #16
35063507
; CHECK1024-NEXT: .cfi_restore vg
3507-
; CHECK1024-NEXT: add sp, x19, #1024
3508+
; CHECK1024-NEXT: sub x8, x29, #1024
3509+
; CHECK1024-NEXT: addvl sp, x8, #-18
35083510
; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
35093511
; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
35103512
; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -4148,9 +4150,9 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %
41484150
; CHECK64-NEXT: mov x0, x20
41494151
; CHECK64-NEXT: bl bar
41504152
; CHECK64-NEXT: mov w0, #22647 // =0x5877
4153+
; CHECK64-NEXT: sub x8, x29, #64
41514154
; CHECK64-NEXT: movk w0, #59491, lsl #16
4152-
; CHECK64-NEXT: add sp, x19, #112
4153-
; CHECK64-NEXT: addvl sp, sp, #1
4155+
; CHECK64-NEXT: addvl sp, x8, #-18
41544156
; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
41554157
; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
41564158
; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
@@ -4263,9 +4265,9 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %
42634265
; CHECK1024-NEXT: mov x0, x20
42644266
; CHECK1024-NEXT: bl bar
42654267
; CHECK1024-NEXT: mov w0, #22647 // =0x5877
4268+
; CHECK1024-NEXT: sub x8, x29, #1024
42664269
; CHECK1024-NEXT: movk w0, #59491, lsl #16
4267-
; CHECK1024-NEXT: add sp, x19, #1072
4268-
; CHECK1024-NEXT: addvl sp, sp, #1
4270+
; CHECK1024-NEXT: addvl sp, x8, #-18
42694271
; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
42704272
; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
42714273
; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload

llvm/test/CodeGen/AArch64/sve-alloca.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
6464
; CHECK-NEXT: str z1, [x0, #1, mul vl]
6565
; CHECK-NEXT: str z0, [x0]
6666
; CHECK-NEXT: bl bar
67-
; CHECK-NEXT: mov sp, x19
67+
; CHECK-NEXT: addvl sp, x29, #-18
6868
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
6969
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
7070
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload

0 commit comments

Comments
 (0)