[AArch64][SME] Enable split SVE for hazard padding in SVE CC functions (#166561)

MacDue · web-flow · commit cf51a5e87268 · 2025-11-12T10:58:05.000Z
This patch enables `aarch64-split-sve-objects` to handle hazard padding in functions that use the SVE CC even when there are no predicate spills/locals. This improves the codegen over the base hazard padding implementation, as rather than placing the padding in the callee-save area, it is placed at the start of the ZPR area. E.g., Current lowering: ``` sub sp, sp, #1040 str x29, [sp, #1024] // 8-byte Folded Spill addvl sp, sp, #-1 str z8, [sp] // 16-byte Folded Spill sub sp, sp, #1040 ``` New lowering: ``` str x29, [sp, #-16]! // 8-byte Folded Spill sub sp, sp, #1024 addvl sp, sp, #-1 str z8, [sp] // 16-byte Folded Spill sub sp, sp, #1040 ``` This also re-enables paired stores for GPRs (as the offsets no longer include the hazard padding).
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2412,9 +2412,31 @@ void AArch64FrameLowering::determineStackHazardSlot(
     AFI->setStackHazardSlotIndex(ID);
   }
 
-  // Determine if we should use SplitSVEObjects. This should only be used if
-  // there's a possibility of a stack hazard between PPRs and ZPRs or FPRs.
+  if (!AFI->hasStackHazardSlotIndex())
+    return;
+
   if (SplitSVEObjects) {
+    CallingConv::ID CC = MF.getFunction().getCallingConv();
+    if (AFI->isSVECC() || CC == CallingConv::AArch64_SVE_VectorCall) {
+      AFI->setSplitSVEObjects(true);
+      LLVM_DEBUG(dbgs() << "Using SplitSVEObjects for SVE CC function\n");
+      return;
+    }
+
+    // We only use SplitSVEObjects in non-SVE CC functions if there's a
+    // possibility of a stack hazard between PPRs and ZPRs/FPRs.
+    LLVM_DEBUG(dbgs() << "Determining if SplitSVEObjects should be used in "
+                         "non-SVE CC function...\n");
+
+    // If another calling convention is explicitly set FPRs can't be promoted to
+    // ZPR callee-saves.
+    if (!is_contained({CallingConv::C, CallingConv::Fast}, CC)) {
+      LLVM_DEBUG(
+          dbgs()
+          << "Calling convention is not supported with SplitSVEObjects\n");
+      return;
+    }
+
     if (!HasPPRCSRs && !HasPPRStackObjects) {
       LLVM_DEBUG(
           dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
@@ -2428,16 +2450,6 @@ void AArch64FrameLowering::determineStackHazardSlot(
       return;
     }
 
-    // If another calling convention is explicitly set FPRs can't be promoted to
-    // ZPR callee-saves.
-    if (!is_contained({CallingConv::C, CallingConv::Fast,
-                       CallingConv::AArch64_SVE_VectorCall},
-                      MF.getFunction().getCallingConv())) {
-      LLVM_DEBUG(
-          dbgs() << "Calling convention is not supported with SplitSVEObjects");
-      return;
-    }
-
     [[maybe_unused]] const AArch64Subtarget &Subtarget =
         MF.getSubtarget<AArch64Subtarget>();
     assert(Subtarget.isSVEorStreamingSVEAvailable() &&
diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll
@@ -839,11 +839,10 @@ define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) {
 define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) {
 ; CHECK-LABEL: only_zpr_csr_vla:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #1056
-; CHECK-NEXT:    str x29, [sp, #1024] // 8-byte Folded Spill
-; CHECK-NEXT:    add x29, sp, #1024
-; CHECK-NEXT:    str x30, [sp, #1032] // 8-byte Folded Spill
-; CHECK-NEXT:    str x19, [sp, #1040] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub sp, sp, #1024
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    str z10, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    str z9, [sp, #1, mul vl] // 16-byte Folded Spill
@@ -870,11 +869,9 @@ define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) {
 ; CHECK-NEXT:    ldr z10, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
-; CHECK-NEXT:    sub sp, x29, #1024
-; CHECK-NEXT:    ldr x19, [sp, #1040] // 8-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #1032] // 8-byte Folded Reload
-; CHECK-NEXT:    ldr x29, [sp, #1024] // 8-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #1056
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload
 ; CHECK-NEXT:    ret
   %alloc = alloca i8, i64 %n, align 1
   call void (...) @llvm.fake.use(ptr %alloc)
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll