Skip to content

Commit 96872e0

Browse files
committed
[AArch64][SME] Support Windows/stack probes in MachineSMEABIPass
On Windows or with stack probes on other targets, additional code needs to be inserted after dynamic stack allocations to validate stack accesses and/or ensure enough stack space has been allocated. Rather than handle this case in the MachineSMEABIPass (like we do for the standard case), we allocate the memory for the lazy save buffer in SelectionDAG, which allows the existing expansions to emit the correct code. Note: This means in these cases, we may allocate a lazy save buffer when there are no lazy saves present in the function (as we have to allocate the buffer before the MachineSMEABIPass runs). Change-Id: If89ab54c4de79f6fe5513a6b387e9e349f7bc7d1
1 parent 7731ecf commit 96872e0

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8489,13 +8489,22 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
84898489
if (Subtarget->hasCustomCallingConv())
84908490
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
84918491

8492-
if (getTM().useNewSMEABILowering() && !Attrs.hasAgnosticZAInterface()) {
8492+
if (getTM().useNewSMEABILowering()) {
84938493
if (Subtarget->isTargetWindows() || hasInlineStackProbe(MF)) {
84948494
SDValue Size;
84958495
if (Attrs.hasZAState()) {
84968496
SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
84978497
DAG.getConstant(1, DL, MVT::i32));
84988498
Size = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
8499+
} else if (Attrs.hasAgnosticZAInterface()) {
8500+
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
8501+
SDValue Callee = DAG.getExternalSymbol(
8502+
getLibcallName(LC), getPointerTy(DAG.getDataLayout()));
8503+
auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.getContext());
8504+
TargetLowering::CallLoweringInfo CLI(DAG);
8505+
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
8506+
getLibcallCallingConv(LC), RetTy, Callee, {});
8507+
std::tie(Size, Chain) = LowerCallTo(CLI);
84998508
}
85008509
if (Size) {
85018510
SDValue Buffer = DAG.getNode(

llvm/lib/Target/AArch64/MachineSMEABIPass.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,9 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
303303
// buffer was allocated in SelectionDAG. It marks the end of the
304304
// allocation -- which is a safe point for this pass to insert any TPIDR2
305305
// block setup.
306-
if (MI.getOpcode() == AArch64::SMEStateAllocPseudo)
306+
if (MI.getOpcode() == AArch64::SMEStateAllocPseudo) {
307307
State.AfterSMEProloguePt = MBBI;
308+
}
308309
auto [NeededState, InsertPt] = getZAStateBeforeInst(
309310
*TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface());
310311
assert((InsertPt == MBBI ||

llvm/test/CodeGen/AArch64/sme-lazy-save-windows.ll

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=aarch64-windows-msvc -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme < %s | FileCheck %s
3-
; RUN: llc -mtriple=aarch64-windows-msvc -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme -aarch64-new-sme-abi < %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64-windows-msvc -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefix=CHECK-NEWLOWERING
44

55
declare void @private_za_callee()
66
declare void @shared_za_callee() "aarch64_inout_za"
@@ -34,6 +34,35 @@ define void @test_lazy_save() nounwind "aarch64_inout_za" {
3434
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
3535
; CHECK-NEXT: ldp x30, x29, [sp], #32 // 16-byte Folded Reload
3636
; CHECK-NEXT: ret
37+
;
38+
; CHECK-NEWLOWERING-LABEL: test_lazy_save:
39+
; CHECK-NEWLOWERING: // %bb.0:
40+
; CHECK-NEWLOWERING-NEXT: stp x30, x29, [sp, #-32]! // 16-byte Folded Spill
41+
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
42+
; CHECK-NEWLOWERING-NEXT: mov x29, sp
43+
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
44+
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
45+
; CHECK-NEWLOWERING-NEXT: mul x9, x8, x8
46+
; CHECK-NEWLOWERING-NEXT: lsr x15, x9, #4
47+
; CHECK-NEWLOWERING-NEXT: bl __chkstk
48+
; CHECK-NEWLOWERING-NEXT: sub x9, sp, x15, lsl #4
49+
; CHECK-NEWLOWERING-NEXT: mov sp, x9
50+
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
51+
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
52+
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
53+
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
54+
; CHECK-NEWLOWERING-NEXT: smstart za
55+
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
56+
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
57+
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB0_2
58+
; CHECK-NEWLOWERING-NEXT: // %bb.1:
59+
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
60+
; CHECK-NEWLOWERING-NEXT: .LBB0_2:
61+
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
62+
; CHECK-NEWLOWERING-NEXT: mov sp, x29
63+
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
64+
; CHECK-NEWLOWERING-NEXT: ldp x30, x29, [sp], #32 // 16-byte Folded Reload
65+
; CHECK-NEWLOWERING-NEXT: ret
3766
call void @private_za_callee()
3867
ret void
3968
}

0 commit comments

Comments
 (0)