Skip to content

Commit cf51a5e

Browse files
authored
[AArch64][SME] Enable split SVE for hazard padding in SVE CC functions (#166561)
This patch enables `aarch64-split-sve-objects` to handle hazard padding in functions that use the SVE CC even when there are no predicate spills/locals. This improves the codegen over the base hazard padding implementation, as rather than placing the padding in the callee-save area, it is placed at the start of the ZPR area. E.g., Current lowering: ``` sub sp, sp, #1040 str x29, [sp, #1024] // 8-byte Folded Spill addvl sp, sp, #-1 str z8, [sp] // 16-byte Folded Spill sub sp, sp, #1040 ``` New lowering: ``` str x29, [sp, #-16]! // 8-byte Folded Spill sub sp, sp, #1024 addvl sp, sp, #-1 str z8, [sp] // 16-byte Folded Spill sub sp, sp, #1040 ``` This also re-enables paired stores for GPRs (as the offsets no longer include the hazard padding).
1 parent 0957656 commit cf51a5e

File tree

3 files changed

+403
-224
lines changed

3 files changed

+403
-224
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2412,9 +2412,31 @@ void AArch64FrameLowering::determineStackHazardSlot(
24122412
AFI->setStackHazardSlotIndex(ID);
24132413
}
24142414

2415-
// Determine if we should use SplitSVEObjects. This should only be used if
2416-
// there's a possibility of a stack hazard between PPRs and ZPRs or FPRs.
2415+
if (!AFI->hasStackHazardSlotIndex())
2416+
return;
2417+
24172418
if (SplitSVEObjects) {
2419+
CallingConv::ID CC = MF.getFunction().getCallingConv();
2420+
if (AFI->isSVECC() || CC == CallingConv::AArch64_SVE_VectorCall) {
2421+
AFI->setSplitSVEObjects(true);
2422+
LLVM_DEBUG(dbgs() << "Using SplitSVEObjects for SVE CC function\n");
2423+
return;
2424+
}
2425+
2426+
// We only use SplitSVEObjects in non-SVE CC functions if there's a
2427+
// possibility of a stack hazard between PPRs and ZPRs/FPRs.
2428+
LLVM_DEBUG(dbgs() << "Determining if SplitSVEObjects should be used in "
2429+
"non-SVE CC function...\n");
2430+
2431+
// If another calling convention is explicitly set FPRs can't be promoted to
2432+
// ZPR callee-saves.
2433+
if (!is_contained({CallingConv::C, CallingConv::Fast}, CC)) {
2434+
LLVM_DEBUG(
2435+
dbgs()
2436+
<< "Calling convention is not supported with SplitSVEObjects\n");
2437+
return;
2438+
}
2439+
24182440
if (!HasPPRCSRs && !HasPPRStackObjects) {
24192441
LLVM_DEBUG(
24202442
dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
@@ -2428,16 +2450,6 @@ void AArch64FrameLowering::determineStackHazardSlot(
24282450
return;
24292451
}
24302452

2431-
// If another calling convention is explicitly set FPRs can't be promoted to
2432-
// ZPR callee-saves.
2433-
if (!is_contained({CallingConv::C, CallingConv::Fast,
2434-
CallingConv::AArch64_SVE_VectorCall},
2435-
MF.getFunction().getCallingConv())) {
2436-
LLVM_DEBUG(
2437-
dbgs() << "Calling convention is not supported with SplitSVEObjects");
2438-
return;
2439-
}
2440-
24412453
[[maybe_unused]] const AArch64Subtarget &Subtarget =
24422454
MF.getSubtarget<AArch64Subtarget>();
24432455
assert(Subtarget.isSVEorStreamingSVEAvailable() &&

llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -839,11 +839,10 @@ define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) {
839839
define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) {
840840
; CHECK-LABEL: only_zpr_csr_vla:
841841
; CHECK: // %bb.0:
842-
; CHECK-NEXT: sub sp, sp, #1056
843-
; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
844-
; CHECK-NEXT: add x29, sp, #1024
845-
; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
846-
; CHECK-NEXT: str x19, [sp, #1040] // 8-byte Folded Spill
842+
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
843+
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
844+
; CHECK-NEXT: mov x29, sp
845+
; CHECK-NEXT: sub sp, sp, #1024
847846
; CHECK-NEXT: addvl sp, sp, #-3
848847
; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
849848
; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
@@ -870,11 +869,9 @@ define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) {
870869
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
871870
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
872871
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
873-
; CHECK-NEXT: sub sp, x29, #1024
874-
; CHECK-NEXT: ldr x19, [sp, #1040] // 8-byte Folded Reload
875-
; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
876-
; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
877-
; CHECK-NEXT: add sp, sp, #1056
872+
; CHECK-NEXT: mov sp, x29
873+
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
874+
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
878875
; CHECK-NEXT: ret
879876
%alloc = alloca i8, i64 %n, align 1
880877
call void (...) @llvm.fake.use(ptr %alloc)

0 commit comments

Comments
 (0)