diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index af4780e11e890..c914ea001cccb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8237,26 +8237,26 @@ SDValue AArch64TargetLowering::LowerFormalArguments( } // varargs - // Note that IsWin64 part is required to prevent odd miscompilations on arm64 - // windows platforms. For more info refer to GH#126780 PR comments. - if (isVarArg && - (DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) { - if (!Subtarget->isTargetDarwin() || IsWin64) { - // The AAPCS variadic function ABI is identical to the non-variadic - // one. As a result there may be more arguments in registers and we should - // save them for future reference. - // Win64 variadic functions also pass arguments in registers, but all float - // arguments are passed in integer registers. - saveVarArgRegisters(CCInfo, DAG, DL, Chain); - } - - // This will point to the next argument passed via stack. - unsigned VarArgsOffset = CCInfo.getStackSize(); - // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 - VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8); - FuncInfo->setVarArgsStackOffset(VarArgsOffset); - FuncInfo->setVarArgsStackIndex( - MFI.CreateFixedObject(4, VarArgsOffset, true)); + if (isVarArg) { + if (DAG.getMachineFunction().getFrameInfo().hasVAStart()) { + if (!Subtarget->isTargetDarwin() || IsWin64) { + // The AAPCS variadic function ABI is identical to the non-variadic + // one. As a result there may be more arguments in registers and we + // should save them for future reference. + // Win64 variadic functions also pass arguments in registers, but all + // float arguments are passed in integer registers. + saveVarArgRegisters(CCInfo, DAG, DL, Chain); + } + + // This will point to the next argument passed via stack. + unsigned VarArgsOffset = CCInfo.getStackSize(); + // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 + VarArgsOffset = + alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8); + FuncInfo->setVarArgsStackOffset(VarArgsOffset); + FuncInfo->setVarArgsStackIndex( + MFI.CreateFixedObject(4, VarArgsOffset, true)); + } if (MFI.hasMustTailInVarArgFunc()) { SmallVector RegParmTypes; @@ -9439,7 +9439,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } } - if (IsVarArg && Subtarget->isWindowsArm64EC()) { + if (IsVarArg && Subtarget->isWindowsArm64EC() && + !(CLI.CB && CLI.CB->isMustTailCall())) { SDValue ParamPtr = StackPtr; if (IsTailCall) { // Create a dummy object at the top of the stack that can be used to get diff --git a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll index 20ff5fc5bc5e1..f964484c0c2d4 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll @@ -24,11 +24,15 @@ define void @has_varargs(...) hybrid_patchable nounwind { ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: "#has_varargs$hp_target": // @"#has_varargs$hp_target" ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: stp x0, x1, [x4, #-32] -; CHECK-NEXT: stp x2, x3, [x4, #-16] -; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x0, x1, [x4, #-32]! +; CHECK-NEXT: stp x2, x3, [x4, #16] +; CHECK-NEXT: str x4, [sp, #8] +; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret + %valist = alloca ptr + call void @llvm.va_start(ptr %valist) + call void @llvm.va_end(ptr %valist) ret void } diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll index 844fc52ddade6..5796b6f3216a7 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll @@ -102,40 +102,58 @@ define void @varargs_many_argscalleer() nounwind { define void @varargs_caller_tail() nounwind { ; CHECK-LABEL: varargs_caller_tail: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: mov x4, sp -; CHECK-NEXT: add x8, sp, #16 -; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000 -; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 -; CHECK-NEXT: mov w1, #2 // =0x2 -; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 -; CHECK-NEXT: mov w3, #4 // =0x4 -; CHECK-NEXT: mov w5, #16 // =0x10 -; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill -; CHECK-NEXT: stp x9, x8, [sp] -; CHECK-NEXT: str xzr, [sp, #16] -; CHECK-NEXT: .weak_anti_dep varargs_callee -; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF -; CHECK-NEXT: .weak_anti_dep "#varargs_callee" -; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF -; CHECK-NEXT: bl "#varargs_callee" -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: add x4, sp, #48 -; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 -; CHECK-NEXT: mov w1, #4 // =0x4 -; CHECK-NEXT: mov w2, #3 // =0x3 -; CHECK-NEXT: mov w3, #2 // =0x2 -; CHECK-NEXT: mov x5, xzr -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: .weak_anti_dep varargs_callee -; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF -; CHECK-NEXT: .weak_anti_dep "#varargs_callee" -; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF -; CHECK-NEXT: b "#varargs_callee" +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w5, #16 // =0x10 +; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: stp x9, x8, [sp] +; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT: .set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT: .set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: bl "#varargs_callee" +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add x4, sp, #48 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #4 // =0x4 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #2 // =0x2 +; CHECK-NEXT: mov x5, xzr +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT: .set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT: .set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: b "#varargs_callee" call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> ) tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2) ret void } -declare void @llvm.va_start(ptr) +; Check we spill/restore x4 and x5, and don't dereference x4. +define void @varargs_thunk(ptr noundef %0, ...) "thunk" { +; CHECK-LABEL: varargs_thunk: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x11, [x0] +; CHECK-NEXT: mov x9, x5 +; CHECK-NEXT: mov x10, x4 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x11, [x11] +; CHECK-NEXT: mov x4, x10 +; CHECK-NEXT: mov x5, x9 +; CHECK-NEXT: br x11 + call void asm "","~{x4},~{x5}"() + %vtable = load ptr, ptr %0, align 8 + %vtablefn = load ptr, ptr %vtable, align 8 + musttail call void (ptr, ...) %vtablefn(ptr noundef %0, ...) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll index da176894c48a9..5d3c755d0d73d 100644 --- a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll +++ b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll @@ -8,8 +8,8 @@ ; CHECK-LABEL: _tailTest: ; CHECK: b __ZN1C3addEPKcz ; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz: -; CHECK: ldr w8, [sp, #4] -; CHECK: str w8, [sp, #4] +; CHECK: ldr w9, [sp, #4] +; CHECK: str w9, [sp, #4] ; CHECK: b __ZN1C1fEiiiiiiiiiz %class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] } diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll index 7ee76c8ad50a2..2d8f312c9694e 100644 --- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll +++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll @@ -37,12 +37,16 @@ attributes #1 = { noinline optnone "thunk" } ; CHECK: ldr x9, [x9] ; CHECK: mov v0.16b, v16.16b ; CHECK: br x9 -; CHECK-EC: mov v7.16b, v0.16b -; CHECK-EC: ldr x9, [x0] -; CHECK-EC: ldr x11, [x9] -; CHECH-EC: add x4, sp, #96 -; CHECK-EC: mov v0.16b, v7.16b -; CHECK-EC: add x4, sp, #96 -; CHECK-EC: ldr x30, [sp, #48] -; CHECK-EC: add sp, sp, #96 -; CHECK-EC: br x11 +; CHECK-EC: mov v7.16b, v0.16b +; CHECK-EC: ldr x9, [x0] +; CHECK-EC: ldr x11, [x9] +; CHECK-EC: blr x9 +; CHECK-EC-NEXT: mov v0.16b, v7.16b +; CHECK-EC-NEXT: ldr q7, [sp] +; CHECK-EC-NEXT: .seh_startepilogue +; CHECK-EC-NEXT: ldr x30, [sp, #48] +; CHECK-EC-NEXT: .seh_save_reg x30, 48 +; CHECK-EC-NEXT: add sp, sp, #96 +; CHECK-EC-NEXT: .seh_stackalloc 96 +; CHECK-EC-NEXT: .seh_endepilogue +; CHECK-EC-NEXT: br x11 diff --git a/llvm/test/CodeGen/AArch64/win64_vararg2.ll b/llvm/test/CodeGen/AArch64/win64_vararg2.ll index dff49148fb772..2d3156a3aadac 100644 --- a/llvm/test/CodeGen/AArch64/win64_vararg2.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-pc-win32 | FileCheck %s -; RUN: llc < %s -global-isel -mtriple=aarch64-pc-win32 | FileCheck %s --check-prefix=GISEL +; RUN: llc < %s -global-isel -mtriple=aarch64-pc-win32 -global-isel-abort=0 | FileCheck %s --check-prefix=GISEL ; Function Attrs: mustprogress noinline nounwind optnone uwtable define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { @@ -14,13 +14,15 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { ; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: .seh_save_reg x30, 24 ; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x5, x6, [sp, #56] ; CHECK-NEXT: str x7, [sp, #72] -; CHECK-NEXT: str w0, [sp, #12] -; CHECK-NEXT: strb w1, [sp, #11] -; CHECK-NEXT: strb w2, [sp, #10] +; CHECK-NEXT: str x8, [sp, #8] +; CHECK-NEXT: str w0, [sp, #4] +; CHECK-NEXT: strb w1, [sp, #3] +; CHECK-NEXT: strb w2, [sp, #2] ; CHECK-NEXT: bl other ; CHECK-NEXT: cmp w19, w0 ; CHECK-NEXT: cset w0, ls @@ -46,13 +48,15 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { ; GISEL-NEXT: str x30, [sp, #24] // 8-byte Folded Spill ; GISEL-NEXT: .seh_save_reg x30, 24 ; GISEL-NEXT: .seh_endprologue -; GISEL-NEXT: stp x3, x4, [sp, #40] +; GISEL-NEXT: add x8, sp, #40 ; GISEL-NEXT: mov w19, w0 +; GISEL-NEXT: stp x3, x4, [sp, #40] ; GISEL-NEXT: stp x5, x6, [sp, #56] ; GISEL-NEXT: str x7, [sp, #72] -; GISEL-NEXT: str w0, [sp, #12] -; GISEL-NEXT: strb w1, [sp, #11] -; GISEL-NEXT: strb w2, [sp, #10] +; GISEL-NEXT: str x8, [sp, #8] +; GISEL-NEXT: str w0, [sp, #4] +; GISEL-NEXT: strb w1, [sp, #3] +; GISEL-NEXT: strb w2, [sp, #2] ; GISEL-NEXT: bl other ; GISEL-NEXT: cmp w19, w0 ; GISEL-NEXT: cset w0, ls @@ -67,6 +71,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { ; GISEL-NEXT: ret ; GISEL-NEXT: .seh_endfunclet ; GISEL-NEXT: .seh_endproc + %valist = alloca ptr + call void @llvm.va_start(ptr %valist) %a_alloc = alloca i32, align 4 %b_alloc = alloca i8, align 1 %c_alloc = alloca i8, align 1 @@ -76,6 +82,7 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) { %a_load = load i32, ptr %a_alloc, align 4 %ret = call noundef i32 @other() %cmp = icmp ule i32 %a_load, %ret + call void @llvm.va_end(ptr %valist) ret i1 %cmp }