From ca1aa7966ba750de56da6610df3b0acaf35e3f71 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sat, 15 Mar 2025 18:35:24 +0300 Subject: [PATCH 1/3] [Aarch64] [ISel] Don't save vaargs registers if vaargs are unused --- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +- .../AArch64/GlobalISel/aapcs_vararg_frame.ll | 64 ++++++++++++++++--- llvm/test/CodeGen/AArch64/alloca.ll | 24 ++++--- llvm/test/CodeGen/AArch64/darwinpcs-tail.ll | 4 +- llvm/test/CodeGen/AArch64/vararg-tallcall.ll | 10 ++- 5 files changed, 81 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a9b4965e32b4c..059789cddc342 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8230,7 +8230,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments( } // varargs - if (isVarArg) { + // Note that IsWin64 part is required to prevent odd miscompilations on arm64 windows + // platforms. For more info refer to GH#126780 PR comments. + if (isVarArg && (DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) { if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll index 7892e892a2412..6ea520ddcd6cd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll @@ -2,19 +2,11 @@ ; RUN: llc < %s --global-isel=0 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s ; RUN: llc < %s --global-isel=1 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s --check-prefix=GISEL +%struct.__va_list = type { ptr, ptr, ptr, i32, i32 } + define void @va(i32 %count, half %f, ...) nounwind { ; CHECK-LABEL: va: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #176 -; CHECK-NEXT: stp x2, x3, [sp, #128] -; CHECK-NEXT: str x1, [sp, #120] -; CHECK-NEXT: stp x4, x5, [sp, #144] -; CHECK-NEXT: stp x6, x7, [sp, #160] -; CHECK-NEXT: stp q1, q2, [sp] -; CHECK-NEXT: stp q3, q4, [sp, #32] -; CHECK-NEXT: stp q5, q6, [sp, #64] -; CHECK-NEXT: str q7, [sp, #96] -; CHECK-NEXT: add sp, sp, #176 ; CHECK-NEXT: ret ; ; GISEL-LABEL: va: @@ -33,3 +25,55 @@ define void @va(i32 %count, half %f, ...) nounwind { entry: ret void } + +define void @va_used(i32 %count, half %f, ...) nounwind { +; CHECK-LABEL: va_used: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #208 +; CHECK-NEXT: mov x8, #-56 // =0xffffffffffffffc8 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: movk x8, #65424, lsl #32 +; CHECK-NEXT: add x9, x9, #112 +; CHECK-NEXT: stp x2, x3, [sp, #128] +; CHECK-NEXT: stp x9, x8, [sp, #192] +; CHECK-NEXT: add x8, x10, #56 +; CHECK-NEXT: add x9, sp, #208 +; CHECK-NEXT: str x1, [sp, #120] +; CHECK-NEXT: stp x4, x5, [sp, #144] +; CHECK-NEXT: stp x6, x7, [sp, #160] +; CHECK-NEXT: stp q1, q2, [sp] +; CHECK-NEXT: stp q3, q4, [sp, #32] +; CHECK-NEXT: stp q5, q6, [sp, #64] +; CHECK-NEXT: str q7, [sp, #96] +; CHECK-NEXT: stp x9, x8, [sp, #176] +; CHECK-NEXT: add sp, sp, #208 +; CHECK-NEXT: ret +; +; GISEL-LABEL: va_used: +; GISEL: // %bb.0: +; GISEL-NEXT: sub sp, sp, #208 +; GISEL-NEXT: mov x8, sp +; GISEL-NEXT: add x9, sp, #208 +; GISEL-NEXT: add x10, sp, #208 +; GISEL-NEXT: stp x9, x10, [x8] +; GISEL-NEXT: add x9, sp, #144 +; GISEL-NEXT: mov w10, #-112 // =0xffffff90 +; GISEL-NEXT: str x9, [x8, #16] +; GISEL-NEXT: mov w9, #-56 // =0xffffffc8 +; GISEL-NEXT: stp x1, x2, [sp, #152] +; GISEL-NEXT: stp x3, x4, [sp, #168] +; GISEL-NEXT: stp x5, x6, [sp, #184] +; GISEL-NEXT: str x7, [sp, #200] +; GISEL-NEXT: stp q1, q2, [sp, #32] +; GISEL-NEXT: stp q3, q4, [sp, #64] +; GISEL-NEXT: stp q5, q6, [sp, #96] +; GISEL-NEXT: str q7, [sp, #128] +; GISEL-NEXT: stp w9, w10, [x8, #24] +; GISEL-NEXT: add sp, sp, #208 +; GISEL-NEXT: ret + %valist = alloca %struct.__va_list + call void @llvm.va_start(ptr %valist) + call void @llvm.va_end(ptr %valist) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/alloca.ll b/llvm/test/CodeGen/AArch64/alloca.ll index ca3a500d79f32..119b84e352c74 100644 --- a/llvm/test/CodeGen/AArch64/alloca.ll +++ b/llvm/test/CodeGen/AArch64/alloca.ll @@ -62,6 +62,8 @@ define i64 @test_alloca_with_local(i64 %n) { ; CHECK: ret } +%struct.__va_list = type { ptr, ptr, ptr, i32, i32 } + define void @test_variadic_alloca(i64 %n, ...) { ; CHECK-LABEL: test_variadic_alloca: @@ -77,31 +79,33 @@ define void @test_variadic_alloca(i64 %n, ...) { ; CHECK: stp x29, x30, [sp, #-16]! ; CHECK: mov x29, sp -; CHECK: sub sp, sp, #192 -; CHECK-DAG: stp q6, q7, [x29, #-96] +; CHECK: sub sp, sp, #224 +; CHECK-DAG: stp q6, q7, [x29, #-128] ; [...] -; CHECK-DAG: stp q0, q1, [x29, #-192] +; CHECK-DAG: stp q2, q3, [x29, #-192] -; CHECK-DAG: stp x5, x6, [x29, #-24] +; CHECK-DAG: stp x5, x6, [x29, #-56] ; [...] -; CHECK-DAG: stp x1, x2, [x29, #-56] +; CHECK-DAG: stp x1, x2, [x29, #-88] ; CHECK-NOFP-ARM64: stp x29, x30, [sp, #-16]! ; CHECK-NOFP-ARM64: mov x29, sp -; CHECK-NOFP-ARM64: sub sp, sp, #64 -; CHECK-NOFP-ARM64-DAG: stp x5, x6, [x29, #-24] +; CHECK-NOFP-ARM64: sub sp, sp, #16 +; CHECK-NOFP-ARM64-DAG: stp x5, x6, [x29, #-56] ; [...] -; CHECK-NOFP-ARM64-DAG: stp x3, x4, [x29, #-40] +; CHECK-NOFP-ARM64-DAG: stp x3, x4, [x29, #-72] ; [...] -; CHECK-NOFP-ARM64-DAG: stp x1, x2, [x29, #-56] +; CHECK-NOFP-ARM64-DAG: stp x1, x2, [x29, #-88] ; [...] ; CHECK-NOFP-ARM64: mov x8, sp - + %valist = alloca %struct.__va_list + call void @llvm.va_start(ptr %valist) %addr = alloca i8, i64 %n call void @use_addr(ptr %addr) ; CHECK: bl use_addr + call void @llvm.va_end(ptr %valist) ret void ; CHECK-NOFP-AARCH64: sub sp, x29, #64 diff --git a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll index 5d3c755d0d73d..da176894c48a9 100644 --- a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll +++ b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll @@ -8,8 +8,8 @@ ; CHECK-LABEL: _tailTest: ; CHECK: b __ZN1C3addEPKcz ; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz: -; CHECK: ldr w9, [sp, #4] -; CHECK: str w9, [sp, #4] +; CHECK: ldr w8, [sp, #4] +; CHECK: str w8, [sp, #4] ; CHECK: b __ZN1C1fEiiiiiiiiiz %class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] } diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll index 812837639196e..7ee76c8ad50a2 100644 --- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll +++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll @@ -8,18 +8,22 @@ target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" %class.X = type { i8 } %struct.B = type { ptr } +%struct.__va_list = type { ptr, ptr, ptr, i32, i32 } $"??_9B@@$BA@AA" = comdat any ; Function Attrs: noinline optnone define linkonce_odr void @"??_9B@@$BA@AA"(ptr %this, ...) #1 comdat align 2 { entry: + %valist = alloca %struct.__va_list + call void @llvm.va_start.p0(ptr %valist) %this.addr = alloca ptr, align 8 store ptr %this, ptr %this.addr, align 8 %this1 = load ptr, ptr %this.addr, align 8 call void asm sideeffect "", "~{d0}"() %vtable = load ptr, ptr %this1, align 8 %0 = load ptr, ptr %vtable, align 8 + call void @llvm.va_end.p0(ptr %valist) musttail call void (ptr, ...) %0(ptr %this1, ...) ret void ; No predecessors! @@ -36,7 +40,9 @@ attributes #1 = { noinline optnone "thunk" } ; CHECK-EC: mov v7.16b, v0.16b ; CHECK-EC: ldr x9, [x0] ; CHECK-EC: ldr x11, [x9] +; CHECH-EC: add x4, sp, #96 ; CHECK-EC: mov v0.16b, v7.16b -; CHECK-EC: add x4, sp, #64 -; CHECK-EC: add sp, sp, #64 +; CHECK-EC: add x4, sp, #96 +; CHECK-EC: ldr x30, [sp, #48] +; CHECK-EC: add sp, sp, #96 ; CHECK-EC: br x11 From a6bba68498ff40c2536e468043166d15a7e44360 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sat, 15 Mar 2025 18:41:12 +0300 Subject: [PATCH 2/3] fix formatting --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 059789cddc342..e12d54c5b13ae 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8232,7 +8232,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // varargs // Note that IsWin64 part is required to prevent odd miscompilations on arm64 windows // platforms. For more info refer to GH#126780 PR comments. - if (isVarArg && (DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) { + if (isVarArg && + (DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) { if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should From 584b0489371e99e98196f86a7909a12a01a66b41 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Wed, 19 Mar 2025 22:30:33 +0300 Subject: [PATCH 3/3] formatting v2 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 888f0f67162b5..3cd47a8b2e578 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8241,8 +8241,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( } // varargs - // Note that IsWin64 part is required to prevent odd miscompilations on arm64 windows - // platforms. For more info refer to GH#126780 PR comments. + // Note that IsWin64 part is required to prevent odd miscompilations on arm64 + // windows platforms. For more info refer to GH#126780 PR comments. if (isVarArg && (DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) { if (!Subtarget->isTargetDarwin() || IsWin64) {