Skip to content

Commit bfb54e8

Browse files
authored
[AArch64][SME] Disable tail calls for callees that require saving ZT0 (#165371)
We may need to load ZT0 after the call, so we can't perform a tail call.
1 parent a4950c4 commit bfb54e8

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9028,11 +9028,12 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
90289028
CallingConv::ID CallerCC = CallerF.getCallingConv();
90299029

90309030
// SME Streaming functions are not eligible for TCO as they may require
9031-
// the streaming mode or ZA to be restored after returning from the call.
9031+
// the streaming mode or ZA/ZT0 to be restored after returning from the call.
90329032
SMECallAttrs CallAttrs =
90339033
getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI);
90349034
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
90359035
CallAttrs.requiresPreservingAllZAState() ||
9036+
CallAttrs.requiresPreservingZT0() ||
90369037
CallAttrs.caller().hasStreamingBody())
90379038
return false;
90389039

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,3 +426,21 @@ define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwin
426426
call void %callee()
427427
ret void
428428
}
429+
430+
define void @disable_tailcallopt(ptr %callee) "aarch64_inout_zt0" nounwind {
431+
; CHECK-COMMON-LABEL: disable_tailcallopt:
432+
; CHECK-COMMON: // %bb.0:
433+
; CHECK-COMMON-NEXT: sub sp, sp, #80
434+
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
435+
; CHECK-COMMON-NEXT: mov x19, sp
436+
; CHECK-COMMON-NEXT: str zt0, [x19]
437+
; CHECK-COMMON-NEXT: smstop za
438+
; CHECK-COMMON-NEXT: blr x0
439+
; CHECK-COMMON-NEXT: smstart za
440+
; CHECK-COMMON-NEXT: ldr zt0, [x19]
441+
; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
442+
; CHECK-COMMON-NEXT: add sp, sp, #80
443+
; CHECK-COMMON-NEXT: ret
444+
tail call void %callee()
445+
ret void
446+
}

0 commit comments

Comments
 (0)