From 768d8f2a2690204c2471cd8c9d81d48be178617f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 6 Nov 2025 20:20:03 -0800 Subject: [PATCH 1/2] RuntimeLibcalls: Add call entries for sincos sleef and armpl libcalls These are the tested set of libcalls used for codegen of llvm.sincos and are needed to get the legalization to follow standard procedure. --- llvm/include/llvm/IR/RuntimeLibcalls.td | 13 +++++++ llvm/lib/IR/RuntimeLibcalls.cpp | 36 ++++++++++++++++--- .../Util/DeclareRuntimeLibcalls/armpl.ll | 13 +++++-- .../Util/DeclareRuntimeLibcalls/sleef.ll | 14 ++++++-- 4 files changed, 67 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 228629a7c86f0..ee80606ed0dbf 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -183,6 +183,7 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { } foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in { + def SINCOS_#VecTy : RuntimeLibcall; def SINCOSPI_#VecTy : RuntimeLibcall; } @@ -1092,6 +1093,11 @@ def __security_check_cookie_arm64ec : RuntimeLibcallImpl SleefLibcalls = { + def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl; + def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl; + def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl; + def _ZGVsNxvl4l4_sincosf : RuntimeLibcallImpl; + def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl; def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl; def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl; @@ -1103,6 +1109,13 @@ defset list SleefLibcalls = { //===----------------------------------------------------------------------===// defset list ARMPLLibcalls = { + def armpl_vsincosq_f64 + : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_vsincosq_f32 + : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_svsincos_f64_x : RuntimeLibcallImpl; + def armpl_svsincos_f32_x : RuntimeLibcallImpl; + def armpl_vsincospiq_f32 : RuntimeLibcallImpl; def armpl_vsincospiq_f64 : RuntimeLibcallImpl; def armpl_svsincospi_f32_x : RuntimeLibcallImpl; diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 795621701d910..e66b9adb43ac4 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -43,17 +43,26 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, switch (ClVectorLibrary) { case VectorLibrary::SLEEFGNUABI: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, + {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf, + RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf, + RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, RTLIB::impl__ZGVsNxvl4l4_sincospif, RTLIB::impl__ZGVsNxvl8l8_sincospi}) setAvailable(Impl); break; case VectorLibrary::ArmPL: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, + {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32, + RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x, + RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, RTLIB::impl_armpl_svsincospi_f32_x, RTLIB::impl_armpl_svsincospi_f64_x}) setAvailable(Impl); + + for (RTLIB::LibcallImpl Impl : + {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32}) + setLibcallImplCallingConv(Impl, CallingConv::AArch64_VectorCall); + break; default: break; @@ -188,6 +197,14 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, fcNegNormal)); return {FuncTy, Attrs}; } + case RTLIB::impl__ZGVnN2vl8l8_sincos: + case RTLIB::impl__ZGVnN4vl4l4_sincosf: + case RTLIB::impl__ZGVsNxvl8l8_sincos: + case RTLIB::impl__ZGVsNxvl4l4_sincosf: + case RTLIB::impl_armpl_vsincosq_f64: + case RTLIB::impl_armpl_vsincosq_f32: + case RTLIB::impl_armpl_svsincos_f64_x: + case RTLIB::impl_armpl_svsincos_f32_x: case RTLIB::impl__ZGVnN4vl4l4_sincospif: case RTLIB::impl__ZGVnN2vl8l8_sincospi: case RTLIB::impl__ZGVsNxvl4l4_sincospif: @@ -201,11 +218,20 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif || LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 || - LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x; + LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x || + LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincosf || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincosf || + LibcallImpl == RTLIB::impl_armpl_vsincosq_f32 || + LibcallImpl == RTLIB::impl_armpl_svsincos_f32_x; + Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); unsigned EC = IsF32 ? 4 : 2; - bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || + bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincos || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincosf || + LibcallImpl == RTLIB::impl_armpl_svsincos_f32_x || + LibcallImpl == RTLIB::impl_armpl_svsincos_f64_x || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi || LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x || LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x; @@ -245,6 +271,8 @@ bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) { /// FIXME: This should be generated by tablegen and support the argument at an /// arbitrary position switch (Impl) { + case RTLIB::impl_armpl_svsincos_f32_x: + case RTLIB::impl_armpl_svsincos_f64_x: case RTLIB::impl_armpl_svsincospi_f32_x: case RTLIB::impl_armpl_svsincospi_f64_x: return true; diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll index c45f319f80122..1d9cf6a5d77fe 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll @@ -1,12 +1,21 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -; CHECK: declare void @armpl_svsincospi_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] +; CHECK: declare void @armpl_svsincos_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] -; CHECK: declare void @armpl_svsincospi_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] +; CHECK: declare void @armpl_svsincos_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] + +; CHECK: declare void @armpl_svsincospi_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] + +; CHECK: declare void @armpl_svsincospi_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] ; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare aarch64_vector_pcs void @armpl_vsincosq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare aarch64_vector_pcs void @armpl_vsincosq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + + ; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll index 7972e0ca1c487..2c6900761b1c0 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll @@ -1,12 +1,20 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVnN4vl4l4_sincosf(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] -; CHECK: declare void @_ZGVsNxvl4l4_sincospif(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare void @_ZGVsNxvl4l4_sincosf(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl4l4_sincospif(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl8l8_sincos(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] -; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } From c5ecd2441fef44682804b902fce8657730660f5a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 6 Nov 2025 20:29:04 -0800 Subject: [PATCH 2/2] DAG: Use sincos vector libcalls through RuntimeLibcalls Copy new process from sincospi. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 7 ++++++- .../CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 14 ++++---------- llvm/lib/CodeGen/TargetLoweringBase.cpp | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 1c167af4b0478..a52ad41d0f1b3 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -334,7 +334,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { break; case Intrinsic::sincos: - LC = RTLIB::getSINCOS(ScalarVT); + LC = RTLIB::getSINCOS(VT); + if (LC == RTLIB::UNKNOWN_LIBCALL) + LC = RTLIB::getSINCOS(ScalarVT); + else if (VT.isVector()) + IsVectorCall = true; + break; default: return std::nullopt; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f5a54497c8a98..78d8ea0676dd7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1268,10 +1268,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { return; break; - + case ISD::FSINCOS: case ISD::FSINCOSPI: { EVT VT = Node->getValueType(0); - RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT); + RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS + ? RTLIB::getSINCOS(VT) + : RTLIB::getSINCOSPI(VT); if (LC != RTLIB::UNKNOWN_LIBCALL && DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) return; @@ -1280,14 +1282,6 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { // scalarizing. break; } - case ISD::FSINCOS: { - // FIXME: Try to directly match vector case like fsincospi - EVT VT = Node->getValueType(0).getVectorElementType(); - RTLIB::Libcall LC = RTLIB::getSINCOS(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) - return; - break; - } case ISD::FMODF: { EVT VT = Node->getValueType(0).getVectorElementType(); RTLIB::Libcall LC = RTLIB::getMODF(VT); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 814b4b57a0b9b..b4eb6c357e10e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -425,6 +425,24 @@ RTLIB::Libcall RTLIB::getCOS(EVT RetVT) { } RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) { + // TODO: Tablegen should generate this function + if (RetVT.isVector()) { + if (!RetVT.isSimple()) + return RTLIB::UNKNOWN_LIBCALL; + switch (RetVT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + return RTLIB::SINCOS_V4F32; + case MVT::v2f64: + return RTLIB::SINCOS_V2F64; + case MVT::nxv4f32: + return RTLIB::SINCOS_NXV4F32; + case MVT::nxv2f64: + return RTLIB::SINCOS_NXV2F64; + default: + return RTLIB::UNKNOWN_LIBCALL; + } + } + return getFPLibCall(RetVT, SINCOS_F32, SINCOS_F64, SINCOS_F80, SINCOS_F128, SINCOS_PPCF128); }