Skip to content

Commit f101ba2

Browse files
committed
Remove BP hack
1 parent 0cedfe5 commit f101ba2

File tree

3 files changed

+48
-44
lines changed

3 files changed

+48
-44
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2286,6 +2286,21 @@ static bool isFuncletReturnInstr(const MachineInstr &MI) {
22862286
}
22872287
}
22882288

2289+
/// Find a GPR restored in the epilogue that is not reserved.
2290+
static Register findRestoredCalleeSaveGPR(const MachineFunction &MF) {
2291+
const MachineFrameInfo &MFI = MF.getFrameInfo();
2292+
const MachineRegisterInfo &MRI = MF.getRegInfo();
2293+
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2294+
for (auto &CS : CSI) {
2295+
Register Reg = CS.getReg();
2296+
if (!CS.isRestored() || MRI.isReserved(Reg) ||
2297+
!AArch64::GPR64RegClass.contains(Reg))
2298+
continue;
2299+
return Reg;
2300+
}
2301+
return AArch64::NoRegister;
2302+
}
2303+
22892304
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
22902305
MachineBasicBlock &MBB) const {
22912306
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -2553,11 +2568,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25532568
Register CalleeSaveBase = AArch64::FP;
25542569
if (int64_t CalleeSaveBaseOffset =
25552570
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
2556-
assert(RegInfo->hasBasePointer(MF) && "Expected base pointer!");
2557-
// NOTE: This base pointer is clobbered from this point on! The next
2558-
// step in eplilogue emission restoring callee-saves, so it should
2559-
// not be used after this point anyway.
2560-
CalleeSaveBase = RegInfo->getBaseRegister();
2571+
// This will find a GPR that is about to be restored -- so safe
2572+
// to clobber. SVE functions have a "big stack" so always spill at
2573+
// least one GPR (as a scratch register).
2574+
CalleeSaveBase = findRestoredCalleeSaveGPR(MF);
2575+
assert(CalleeSaveBase != AArch64::NoRegister);
25612576
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
25622577
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
25632578
MachineInstr::FrameDestroy);

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -635,33 +635,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
635635
// Furthermore, if both variable sized objects are present, and the
636636
// stack needs to be dynamically re-aligned, the base pointer is the only
637637
// reliable way to reference the locals.
638-
bool CannotUseSPForSVERestore =
639-
MFI.hasVarSizedObjects() || hasStackRealignment(MF);
640-
if (CannotUseSPForSVERestore || MF.hasEHFunclets()) {
638+
if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
639+
if (hasStackRealignment(MF))
640+
return true;
641+
642+
auto &ST = MF.getSubtarget<AArch64Subtarget>();
643+
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
644+
if (ST.hasSVE() || ST.isStreaming()) {
645+
// Frames that have variable sized objects and scalable SVE objects,
646+
// should always use a basepointer.
647+
if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
648+
return true;
649+
}
650+
641651
// Frames with hazard padding can have a large offset between the frame
642652
// pointer and GPR locals, which includes the emergency spill slot. If the
643653
// emergency spill slot is not within range of the load/store instructions
644654
// (which have a signed 9-bit range), we will fail to compile if it is used.
645655
// Since hasBasePointer() is called before we know if we have hazard padding
646656
// or an emergency spill slot we need to enable the basepointer
647657
// conservatively.
648-
auto &ST = MF.getSubtarget<AArch64Subtarget>();
649-
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
650658
if (ST.getStreamingHazardSize() &&
651659
!AFI->getSMEFnAttrs().hasNonStreamingInterfaceAndBody()) {
652660
return true;
653661
}
654662

655-
if (hasStackRealignment(MF))
656-
return MFI.hasVarSizedObjects() || MF.hasEHFunclets();
657-
658-
if (ST.hasSVE() || ST.isStreaming()) {
659-
// Frames that have variable sized objects and scalable SVE objects,
660-
// should always use a basepointer.
661-
if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
662-
return true;
663-
}
664-
665663
// Conservatively estimate whether the negative offset from the frame
666664
// pointer will be sufficient to reach. If a function has a smallish
667665
// frame, it's less likely to have lots of spills and callee saved

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3703,11 +3703,10 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
37033703
; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
37043704
; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill
37053705
; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill
3706-
; CHECK64-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
3706+
; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill
37073707
; CHECK64-NEXT: add x29, sp, #64
37083708
; CHECK64-NEXT: .cfi_def_cfa w29, 64
3709-
; CHECK64-NEXT: .cfi_offset w19, -8
3710-
; CHECK64-NEXT: .cfi_offset w20, -16
3709+
; CHECK64-NEXT: .cfi_offset w19, -16
37113710
; CHECK64-NEXT: .cfi_offset w26, -24
37123711
; CHECK64-NEXT: .cfi_offset w27, -32
37133712
; CHECK64-NEXT: .cfi_offset w28, -40
@@ -3752,21 +3751,20 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
37523751
; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG
37533752
; CHECK64-NEXT: sub x9, sp, #1088
37543753
; CHECK64-NEXT: and sp, x9, #0xffffffffffffffe0
3755-
; CHECK64-NEXT: mov x19, sp
37563754
; CHECK64-NEXT: mov w2, w1
37573755
; CHECK64-NEXT: //APP
37583756
; CHECK64-NEXT: //NO_APP
37593757
; CHECK64-NEXT: bl __arm_sme_state
3760-
; CHECK64-NEXT: and x20, x0, #0x1
3758+
; CHECK64-NEXT: and x19, x0, #0x1
37613759
; CHECK64-NEXT: .cfi_offset vg, -48
3762-
; CHECK64-NEXT: tbz w20, #0, .LBB36_2
3760+
; CHECK64-NEXT: tbz w19, #0, .LBB36_2
37633761
; CHECK64-NEXT: // %bb.1: // %entry
37643762
; CHECK64-NEXT: smstop sm
37653763
; CHECK64-NEXT: .LBB36_2: // %entry
3766-
; CHECK64-NEXT: add x0, x19, #0
3764+
; CHECK64-NEXT: mov x0, sp
37673765
; CHECK64-NEXT: mov w1, #45 // =0x2d
37683766
; CHECK64-NEXT: bl memset
3769-
; CHECK64-NEXT: tbz w20, #0, .LBB36_4
3767+
; CHECK64-NEXT: tbz w19, #0, .LBB36_4
37703768
; CHECK64-NEXT: // %bb.3: // %entry
37713769
; CHECK64-NEXT: smstart sm
37723770
; CHECK64-NEXT: .LBB36_4: // %entry
@@ -3813,14 +3811,12 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
38133811
; CHECK64-NEXT: .cfi_restore z15
38143812
; CHECK64-NEXT: sub sp, x29, #64
38153813
; CHECK64-NEXT: .cfi_def_cfa wsp, 128
3816-
; CHECK64-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
3817-
; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload
3818-
; CHECK64-NEXT: ldp x27, x26, [sp, #96] // 16-byte Folded Reload
3814+
; CHECK64-NEXT: ldp x26, x19, [sp, #104] // 16-byte Folded Reload
3815+
; CHECK64-NEXT: ldp x28, x27, [sp, #88] // 16-byte Folded Reload
38193816
; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
38203817
; CHECK64-NEXT: add sp, sp, #128
38213818
; CHECK64-NEXT: .cfi_def_cfa_offset 0
38223819
; CHECK64-NEXT: .cfi_restore w19
3823-
; CHECK64-NEXT: .cfi_restore w20
38243820
; CHECK64-NEXT: .cfi_restore w26
38253821
; CHECK64-NEXT: .cfi_restore w27
38263822
; CHECK64-NEXT: .cfi_restore w28
@@ -3839,12 +3835,10 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
38393835
; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill
38403836
; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill
38413837
; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill
3842-
; CHECK1024-NEXT: str x20, [sp, #1072] // 8-byte Folded Spill
3843-
; CHECK1024-NEXT: str x19, [sp, #1080] // 8-byte Folded Spill
3838+
; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill
38443839
; CHECK1024-NEXT: add x29, sp, #1024
38453840
; CHECK1024-NEXT: .cfi_def_cfa w29, 64
3846-
; CHECK1024-NEXT: .cfi_offset w19, -8
3847-
; CHECK1024-NEXT: .cfi_offset w20, -16
3841+
; CHECK1024-NEXT: .cfi_offset w19, -16
38483842
; CHECK1024-NEXT: .cfi_offset w26, -24
38493843
; CHECK1024-NEXT: .cfi_offset w27, -32
38503844
; CHECK1024-NEXT: .cfi_offset w28, -40
@@ -3889,21 +3883,20 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
38893883
; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG
38903884
; CHECK1024-NEXT: sub x9, sp, #2048
38913885
; CHECK1024-NEXT: and sp, x9, #0xffffffffffffffe0
3892-
; CHECK1024-NEXT: mov x19, sp
38933886
; CHECK1024-NEXT: mov w2, w1
38943887
; CHECK1024-NEXT: //APP
38953888
; CHECK1024-NEXT: //NO_APP
38963889
; CHECK1024-NEXT: bl __arm_sme_state
3897-
; CHECK1024-NEXT: and x20, x0, #0x1
3890+
; CHECK1024-NEXT: and x19, x0, #0x1
38983891
; CHECK1024-NEXT: .cfi_offset vg, -48
3899-
; CHECK1024-NEXT: tbz w20, #0, .LBB36_2
3892+
; CHECK1024-NEXT: tbz w19, #0, .LBB36_2
39003893
; CHECK1024-NEXT: // %bb.1: // %entry
39013894
; CHECK1024-NEXT: smstop sm
39023895
; CHECK1024-NEXT: .LBB36_2: // %entry
3903-
; CHECK1024-NEXT: add x0, x19, #0
3896+
; CHECK1024-NEXT: mov x0, sp
39043897
; CHECK1024-NEXT: mov w1, #45 // =0x2d
39053898
; CHECK1024-NEXT: bl memset
3906-
; CHECK1024-NEXT: tbz w20, #0, .LBB36_4
3899+
; CHECK1024-NEXT: tbz w19, #0, .LBB36_4
39073900
; CHECK1024-NEXT: // %bb.3: // %entry
39083901
; CHECK1024-NEXT: smstart sm
39093902
; CHECK1024-NEXT: .LBB36_4: // %entry
@@ -3950,8 +3943,7 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
39503943
; CHECK1024-NEXT: .cfi_restore z15
39513944
; CHECK1024-NEXT: sub sp, x29, #1024
39523945
; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088
3953-
; CHECK1024-NEXT: ldr x19, [sp, #1080] // 8-byte Folded Reload
3954-
; CHECK1024-NEXT: ldr x20, [sp, #1072] // 8-byte Folded Reload
3946+
; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload
39553947
; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload
39563948
; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload
39573949
; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload
@@ -3960,7 +3952,6 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
39603952
; CHECK1024-NEXT: add sp, sp, #1088
39613953
; CHECK1024-NEXT: .cfi_def_cfa_offset 0
39623954
; CHECK1024-NEXT: .cfi_restore w19
3963-
; CHECK1024-NEXT: .cfi_restore w20
39643955
; CHECK1024-NEXT: .cfi_restore w26
39653956
; CHECK1024-NEXT: .cfi_restore w27
39663957
; CHECK1024-NEXT: .cfi_restore w28

0 commit comments

Comments
 (0)