Skip to content

Commit 635d508

Browse files
committed
[AArch64][SME] Disable tail calls for callees that require saving ZT0
We may need to load ZT0 after the call, so we can't perform a tail call.
1 parent e1b55d0 commit 635d508

File tree

2 files changed

+12
-6
lines changed

2 files changed

+12
-6
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9028,11 +9028,12 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
90289028
CallingConv::ID CallerCC = CallerF.getCallingConv();
90299029

90309030
// SME Streaming functions are not eligible for TCO as they may require
9031-
// the streaming mode or ZA to be restored after returning from the call.
9031+
// the streaming mode or ZA/ZT0 to be restored after returning from the call.
90329032
SMECallAttrs CallAttrs =
90339033
getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI);
90349034
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
90359035
CallAttrs.requiresPreservingAllZAState() ||
9036+
CallAttrs.requiresPreservingZT0() ||
90369037
CallAttrs.caller().hasStreamingBody())
90379038
return false;
90389039

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -430,12 +430,17 @@ define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwin
430430
define void @disable_tailcallopt(ptr %callee) "aarch64_inout_zt0" nounwind {
431431
; CHECK-COMMON-LABEL: disable_tailcallopt:
432432
; CHECK-COMMON: // %bb.0:
433-
; CHECK-COMMON-NEXT: sub sp, sp, #64
434-
; CHECK-COMMON-NEXT: mov x8, sp
435-
; CHECK-COMMON-NEXT: str zt0, [x8]
433+
; CHECK-COMMON-NEXT: sub sp, sp, #80
434+
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
435+
; CHECK-COMMON-NEXT: mov x19, sp
436+
; CHECK-COMMON-NEXT: str zt0, [x19]
436437
; CHECK-COMMON-NEXT: smstop za
437-
; CHECK-COMMON-NEXT: add sp, sp, #64
438-
; CHECK-COMMON-NEXT: br x0
438+
; CHECK-COMMON-NEXT: blr x0
439+
; CHECK-COMMON-NEXT: smstart za
440+
; CHECK-COMMON-NEXT: ldr zt0, [x19]
441+
; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
442+
; CHECK-COMMON-NEXT: add sp, sp, #80
443+
; CHECK-COMMON-NEXT: ret
439444
tail call void %callee()
440445
ret void
441446
}

0 commit comments

Comments
 (0)