-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AArch64][SVE] Remove isSVECC() in favour of changing the calling convention #152742
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
#include "llvm/ADT/APInt.h" | ||
#include "llvm/ADT/ArrayRef.h" | ||
#include "llvm/ADT/STLExtras.h" | ||
#include "llvm/ADT/ScopeExit.h" | ||
#include "llvm/ADT/SmallSet.h" | ||
#include "llvm/ADT/SmallVector.h" | ||
#include "llvm/ADT/SmallVectorExtras.h" | ||
|
@@ -7837,24 +7838,44 @@ SDValue AArch64TargetLowering::LowerFormalArguments( | |
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, | ||
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { | ||
MachineFunction &MF = DAG.getMachineFunction(); | ||
const Function &F = MF.getFunction(); | ||
Function &F = MF.getFunction(); | ||
MachineFrameInfo &MFI = MF.getFrameInfo(); | ||
bool IsWin64 = | ||
Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()); | ||
bool StackViaX4 = CallConv == CallingConv::ARM64EC_Thunk_X64 || | ||
(isVarArg && Subtarget->isWindowsArm64EC()); | ||
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); | ||
|
||
SmallVector<ISD::OutputArg, 4> Outs; | ||
GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs, | ||
DAG.getTargetLoweringInfo(), MF.getDataLayout()); | ||
if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); })) | ||
FuncInfo->setIsSVECC(true); | ||
|
||
// Assign locations to all of the incoming arguments. | ||
SmallVector<CCValAssign, 16> ArgLocs; | ||
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); | ||
|
||
// This logic is consistent with AArch64TargetLowering::LowerCall. | ||
// The `ShouldUpgradeToSVECC` flag can be when analyzing arguments. | ||
bool ShouldUpgradeToSVECC = false; | ||
auto _ = make_scope_exit([&] { | ||
if (CallConv != CallingConv::C && CallConv != CallingConv::Fast) | ||
return; | ||
|
||
if (!ShouldUpgradeToSVECC) { | ||
// If the flag was not set, check if the return value requires the SVE CC. | ||
SmallVector<ISD::OutputArg, 4> Outs; | ||
GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs, | ||
DAG.getTargetLoweringInfo(), MF.getDataLayout()); | ||
ShouldUpgradeToSVECC = any_of( | ||
Outs, [](ISD::OutputArg &Out) { return Out.VT.isScalableVector(); }); | ||
} | ||
|
||
if (!ShouldUpgradeToSVECC) | ||
return; | ||
|
||
if (isVarArg) | ||
report_fatal_error("Passing/returning SVE types to variadic functions " | ||
"is currently not supported"); | ||
Comment on lines
+7872
to
+7874
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can pass them to variadic functions, but not as variadic arguments. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I was confused by the wording of our current errors which say "Passing SVE types to variadic functions currently not supported" (which it appears to check it use is as a variadic argument). |
||
|
||
F.setCallingConv(CallingConv::AArch64_SVE_VectorCall); | ||
}); | ||
|
||
// At this point, Ins[].VT may already be promoted to i32. To correctly | ||
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and | ||
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. | ||
|
@@ -7942,14 +7963,14 @@ SDValue AArch64TargetLowering::LowerFormalArguments( | |
RC = &AArch64::FPR128RegClass; | ||
else if (RegVT.isScalableVector() && | ||
RegVT.getVectorElementType() == MVT::i1) { | ||
FuncInfo->setIsSVECC(true); | ||
RC = &AArch64::PPRRegClass; | ||
ShouldUpgradeToSVECC = true; | ||
} else if (RegVT == MVT::aarch64svcount) { | ||
FuncInfo->setIsSVECC(true); | ||
RC = &AArch64::PPRRegClass; | ||
ShouldUpgradeToSVECC = true; | ||
} else if (RegVT.isScalableVector()) { | ||
FuncInfo->setIsSVECC(true); | ||
RC = &AArch64::ZPRRegClass; | ||
ShouldUpgradeToSVECC = true; | ||
} else | ||
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); | ||
|
||
|
@@ -8597,14 +8618,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( | |
CallAttrs.caller().hasStreamingBody()) | ||
return false; | ||
|
||
// Functions using the C or Fast calling convention that have an SVE signature | ||
// preserve more registers and should assume the SVE_VectorCall CC. | ||
// The check for matching callee-saved regs will determine whether it is | ||
// eligible for TCO. | ||
if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) && | ||
MF.getInfo<AArch64FunctionInfo>()->isSVECC()) | ||
CallerCC = CallingConv::AArch64_SVE_VectorCall; | ||
|
||
bool CCMatch = CallerCC == CalleeCC; | ||
|
||
// When using the Windows calling convention on a non-windows OS, we want | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,7 @@ | ||
; RUN: sed -e "s,CC,cfguard_checkcc,g" %s | not --crash llc -mtriple=arm64-apple-darwin -o - 2>&1 | FileCheck %s --check-prefix=CFGUARD | ||
; RUN: sed -e "s,CC,aarch64_sve_vector_pcs,g" %s | not --crash llc -mtriple=arm64-apple-darwin -o - 2>&1 | FileCheck %s --check-prefix=SVE_VECTOR_PCS | ||
|
||
define CC void @f0() { | ||
unreachable | ||
} | ||
|
||
; CFGUARD: Calling convention CFGuard_Check is unsupported on Darwin. | ||
; SVE_VECTOR_PCS: Calling convention SVE_VectorCall is unsupported on Darwin. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -784,8 +784,6 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr | |
; CHECK-NEXT: .seh_proc f6 | ||
; CHECK-NEXT: .seh_handler __CxxFrameHandler3, @unwind, @except | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: sub sp, sp, #16 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know enough about SEH to tell if this change is significant (to me, it seems insignificant). I also don't know if this case is fully supported. |
||
; CHECK-NEXT: .seh_stackalloc 16 | ||
; CHECK-NEXT: addvl sp, sp, #-18 | ||
; CHECK-NEXT: .seh_allocz 18 | ||
; CHECK-NEXT: str p4, [sp] // 2-byte Folded Spill | ||
|
@@ -853,21 +851,21 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr | |
; CHECK-NEXT: add x29, sp, #16 | ||
; CHECK-NEXT: .seh_add_fp 16 | ||
; CHECK-NEXT: .seh_endprologue | ||
; CHECK-NEXT: sub sp, sp, #64 | ||
; CHECK-NEXT: sub sp, sp, #80 | ||
; CHECK-NEXT: mov x0, #-2 // =0xfffffffffffffffe | ||
; CHECK-NEXT: addvl x8, x29, #18 | ||
; CHECK-NEXT: mov x19, sp | ||
; CHECK-NEXT: stur x0, [x8, #16] | ||
; CHECK-NEXT: stur x0, [x8] | ||
; CHECK-NEXT: addvl x8, x29, #18 | ||
; CHECK-NEXT: ldr x1, [x8, #32] | ||
; CHECK-NEXT: .Ltmp0: | ||
; CHECK-NEXT: ldr x1, [x8, #16] | ||
; CHECK-NEXT: .Ltmp0: // EH_LABEL | ||
; CHECK-NEXT: add x0, x19, #0 | ||
; CHECK-NEXT: bl g6 | ||
; CHECK-NEXT: .Ltmp1: | ||
; CHECK-NEXT: .Ltmp1: // EH_LABEL | ||
; CHECK-NEXT: // %bb.1: // %invoke.cont | ||
; CHECK-NEXT: .seh_startepilogue | ||
; CHECK-NEXT: add sp, sp, #64 | ||
; CHECK-NEXT: .seh_stackalloc 64 | ||
; CHECK-NEXT: add sp, sp, #80 | ||
; CHECK-NEXT: .seh_stackalloc 80 | ||
; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_fplr 16 | ||
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload | ||
|
@@ -932,12 +930,8 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr | |
; CHECK-NEXT: .seh_save_preg p14, 10 | ||
; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_preg p15, 11 | ||
; CHECK-NEXT: add sp, sp, #16 | ||
; CHECK-NEXT: .seh_stackalloc 16 | ||
; CHECK-NEXT: addvl sp, sp, #18 | ||
; CHECK-NEXT: .seh_allocz 18 | ||
; CHECK-NEXT: add sp, sp, #16 | ||
; CHECK-NEXT: .seh_stackalloc 16 | ||
; CHECK-NEXT: .seh_endepilogue | ||
; CHECK-NEXT: ret | ||
; CHECK-NEXT: .seh_endfunclet | ||
|
@@ -1160,64 +1154,6 @@ define void @f8(<vscale x 2 x i64> %v) { | |
ret void | ||
} | ||
|
||
define void @f9(<vscale x 2 x i64> %v, ...) { | ||
; CHECK-LABEL: f9: | ||
; CHECK: .seh_proc f9 | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: sub sp, sp, #64 | ||
; CHECK-NEXT: .seh_stackalloc 64 | ||
; CHECK-NEXT: addvl sp, sp, #-1 | ||
; CHECK-NEXT: .seh_allocz 1 | ||
; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill | ||
; CHECK-NEXT: .seh_save_zreg z8, 0 | ||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill | ||
; CHECK-NEXT: .seh_save_reg_x x30, 16 | ||
; CHECK-NEXT: .seh_endprologue | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: add x9, sp, #8 | ||
; CHECK-NEXT: str x2, [x8, #32] | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: str x0, [x8, #16] | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: str x1, [x8, #24] | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: str x3, [x8, #40] | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: str x4, [x8, #48] | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: str x5, [x8, #56] | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: str x6, [x8, #64] | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: str x7, [x8, #72] | ||
; CHECK-NEXT: add x8, sp, #16 | ||
; CHECK-NEXT: addvl x8, x8, #1 | ||
; CHECK-NEXT: str x8, [sp, #8] | ||
; CHECK-NEXT: //APP | ||
; CHECK-NEXT: //NO_APP | ||
; CHECK-NEXT: .seh_startepilogue | ||
; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_reg x30, 0 | ||
; CHECK-NEXT: add sp, sp, #16 | ||
; CHECK-NEXT: .seh_stackalloc 16 | ||
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_zreg z8, 0 | ||
; CHECK-NEXT: add sp, sp, #64 | ||
; CHECK-NEXT: .seh_stackalloc 64 | ||
; CHECK-NEXT: addvl sp, sp, #1 | ||
; CHECK-NEXT: .seh_allocz 1 | ||
; CHECK-NEXT: add sp, sp, #64 | ||
; CHECK-NEXT: .seh_stackalloc 64 | ||
; CHECK-NEXT: .seh_endepilogue | ||
; CHECK-NEXT: ret | ||
; CHECK-NEXT: .seh_endfunclet | ||
; CHECK-NEXT: .seh_endproc | ||
%va_list = alloca ptr | ||
call void @llvm.va_start.p0(ptr %va_list) | ||
call void asm "", "r,~{d8},~{memory}"(ptr %va_list) | ||
ret void | ||
} | ||
|
||
declare void @g10(ptr,ptr) | ||
define void @f10(i64 %n, <vscale x 2 x i64> %x) "frame-pointer"="all" { | ||
; CHECK-LABEL: f10: | ||
|
@@ -1546,40 +1482,33 @@ define tailcc void @f15(double %d, <vscale x 4 x i32> %vs, [9 x i64], i32 %i) { | |
; CHECK-LABEL: f15: | ||
; CHECK: .seh_proc f15 | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: addvl sp, sp, #-1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is due to the explicit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The LangRef says that the calling convention requires the prototype of all callees to exactly match the prototype of the function definition. So I guess adding |
||
; CHECK-NEXT: .seh_allocz 1 | ||
; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill | ||
; CHECK-NEXT: .seh_save_zreg z8, 0 | ||
; CHECK-NEXT: str x28, [sp, #-16]! // 8-byte Folded Spill | ||
; CHECK-NEXT: .seh_save_reg_x x28, 16 | ||
; CHECK-NEXT: str x28, [sp, #-32]! // 8-byte Folded Spill | ||
; CHECK-NEXT: .seh_save_reg_x x28, 32 | ||
; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill | ||
; CHECK-NEXT: .seh_save_reg x30, 8 | ||
; CHECK-NEXT: sub sp, sp, #16 | ||
; CHECK-NEXT: .seh_stackalloc 16 | ||
; CHECK-NEXT: str d8, [sp, #16] // 8-byte Folded Spill | ||
; CHECK-NEXT: .seh_save_freg d8, 16 | ||
; CHECK-NEXT: addvl sp, sp, #-1 | ||
; CHECK-NEXT: .seh_allocz 1 | ||
; CHECK-NEXT: .seh_endprologue | ||
; CHECK-NEXT: addvl x8, sp, #2 | ||
; CHECK-NEXT: addvl x8, sp, #1 | ||
; CHECK-NEXT: addvl x9, sp, #1 | ||
; CHECK-NEXT: //APP | ||
; CHECK-NEXT: //NO_APP | ||
; CHECK-NEXT: stp d0, d0, [sp, #8] | ||
; CHECK-NEXT: ldr w8, [x8, #104] | ||
; CHECK-NEXT: str w8, [sp, #8] | ||
; CHECK-NEXT: str d0, [x9, #24] | ||
; CHECK-NEXT: addvl x9, sp, #1 | ||
; CHECK-NEXT: str d0, [sp] | ||
; CHECK-NEXT: str w8, [x9, #24] | ||
; CHECK-NEXT: .seh_startepilogue | ||
; CHECK-NEXT: addvl sp, sp, #1 | ||
; CHECK-NEXT: .seh_allocz 1 | ||
; CHECK-NEXT: add sp, sp, #16 | ||
; CHECK-NEXT: .seh_stackalloc 16 | ||
; CHECK-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_freg d8, 16 | ||
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_reg x30, 8 | ||
; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_reg x28, 0 | ||
; CHECK-NEXT: add sp, sp, #16 | ||
; CHECK-NEXT: .seh_stackalloc 16 | ||
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_zreg z8, 0 | ||
; CHECK-NEXT: addvl sp, sp, #1 | ||
; CHECK-NEXT: .seh_allocz 1 | ||
; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload | ||
; CHECK-NEXT: .seh_save_reg_x x28, 32 | ||
; CHECK-NEXT: add sp, sp, #80 | ||
; CHECK-NEXT: .seh_stackalloc 80 | ||
; CHECK-NEXT: .seh_endepilogue | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: whitespace