diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a52ad41d0f1b3..944e1714e8f98 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -313,33 +313,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Type *Ty = getContainedTypes(RetTy).front(); EVT VT = getTLI()->getValueType(DL, Ty); - EVT ScalarVT = VT.getScalarType(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - /// Migration flag. IsVectorCall cases directly know about the vector - /// libcall in RuntimeLibcallsInfo and shouldn't try to use - /// LibInfo->getVectorMappingInfo. - bool IsVectorCall = false; - switch (ICA.getID()) { case Intrinsic::modf: - LC = RTLIB::getMODF(ScalarVT); + LC = RTLIB::getMODF(VT); break; case Intrinsic::sincospi: LC = RTLIB::getSINCOSPI(VT); - if (LC == RTLIB::UNKNOWN_LIBCALL) - LC = RTLIB::getSINCOSPI(ScalarVT); - else if (VT.isVector()) - IsVectorCall = true; - break; case Intrinsic::sincos: LC = RTLIB::getSINCOS(VT); - if (LC == RTLIB::UNKNOWN_LIBCALL) - LC = RTLIB::getSINCOS(ScalarVT); - else if (VT.isVector()) - IsVectorCall = true; - break; default: return std::nullopt; @@ -350,33 +334,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (LibcallImpl == RTLIB::Unsupported) return std::nullopt; - StringRef LCName = - RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpl); - - // Search for a corresponding vector variant. - // - // FIXME: Should use RuntimeLibcallsInfo, not TargetLibraryInfo to get the - // vector mapping. LLVMContext &Ctx = RetTy->getContext(); - ElementCount VF = getVectorizedTypeVF(RetTy); - VecDesc const *VD = nullptr; - - if (!IsVectorCall) { - for (bool Masked : {false, true}) { - if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked))) - break; - } - if (!VD) - return std::nullopt; - } // Cost the call + mask. auto Cost = thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind); - if ((VD && VD->isMasked()) || - (IsVectorCall && - RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl))) { + if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { + ElementCount VF = getVectorizedTypeVF(RetTy); auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF); Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, VecTy, {}, CostKind, 0, nullptr, {}); diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index ee80606ed0dbf..ce7e836f66446 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -183,6 +183,7 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { } foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in { + def MODF_#VecTy : RuntimeLibcall; def SINCOS_#VecTy : RuntimeLibcall; def SINCOSPI_#VecTy : RuntimeLibcall; } @@ -1093,6 +1094,11 @@ def __security_check_cookie_arm64ec : RuntimeLibcallImpl SleefLibcalls = { + def _ZGVnN2vl8_modf : RuntimeLibcallImpl; + def _ZGVnN4vl4_modff : RuntimeLibcallImpl; + def _ZGVsNxvl8_modf : RuntimeLibcallImpl; + def _ZGVsNxvl4_modff : RuntimeLibcallImpl; + def _ZGVnN2vl8l8_sincos : RuntimeLibcallImpl; def _ZGVnN4vl4l4_sincosf : RuntimeLibcallImpl; def _ZGVsNxvl8l8_sincos : RuntimeLibcallImpl; @@ -1109,6 +1115,11 @@ defset list SleefLibcalls = { //===----------------------------------------------------------------------===// defset list ARMPLLibcalls = { + def armpl_vmodfq_f64 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_vmodfq_f32 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall + def armpl_svmodf_f64_x : RuntimeLibcallImpl; + def armpl_svmodf_f32_x : RuntimeLibcallImpl; + def armpl_vsincosq_f64 : RuntimeLibcallImpl; // CallingConv::AArch64_VectorCall def armpl_vsincosq_f32 diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 78d8ea0676dd7..a7ae794459331 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1283,9 +1283,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { break; } case ISD::FMODF: { - EVT VT = Node->getValueType(0).getVectorElementType(); + EVT VT = Node->getValueType(0); RTLIB::Libcall LC = RTLIB::getMODF(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, + if (LC != RTLIB::UNKNOWN_LIBCALL && + DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 36a424f1c8b63..77d9b156e2672 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -475,6 +475,24 @@ RTLIB::Libcall RTLIB::getSINCOS_STRET(EVT RetVT) { } RTLIB::Libcall RTLIB::getMODF(EVT RetVT) { + // TODO: Tablegen should generate this function + if (RetVT.isVector()) { + if (!RetVT.isSimple()) + return RTLIB::UNKNOWN_LIBCALL; + switch (RetVT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + return RTLIB::MODF_V4F32; + case MVT::v2f64: + return RTLIB::MODF_V2F64; + case MVT::nxv4f32: + return RTLIB::MODF_NXV4F32; + case MVT::nxv2f64: + return RTLIB::MODF_NXV2F64; + default: + return RTLIB::UNKNOWN_LIBCALL; + } + } + return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128, MODF_PPCF128); } diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index e66b9adb43ac4..ee23b58742b64 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -43,7 +43,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, switch (ClVectorLibrary) { case VectorLibrary::SLEEFGNUABI: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf, + {RTLIB::impl__ZGVnN2vl8_modf, RTLIB::impl__ZGVnN4vl4_modff, + RTLIB::impl__ZGVsNxvl8_modf, RTLIB::impl__ZGVsNxvl4_modff, + RTLIB::impl__ZGVnN2vl8l8_sincos, RTLIB::impl__ZGVnN4vl4l4_sincosf, RTLIB::impl__ZGVsNxvl8l8_sincos, RTLIB::impl__ZGVsNxvl4l4_sincosf, RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, RTLIB::impl__ZGVsNxvl4l4_sincospif, @@ -52,7 +54,9 @@ RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, break; case VectorLibrary::ArmPL: for (RTLIB::LibcallImpl Impl : - {RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32, + {RTLIB::impl_armpl_vmodfq_f64, RTLIB::impl_armpl_vmodfq_f32, + RTLIB::impl_armpl_svmodf_f64_x, RTLIB::impl_armpl_svmodf_f32_x, + RTLIB::impl_armpl_vsincosq_f64, RTLIB::impl_armpl_vsincosq_f32, RTLIB::impl_armpl_svsincos_f64_x, RTLIB::impl_armpl_svsincos_f32_x, RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, RTLIB::impl_armpl_svsincospi_f32_x, @@ -197,6 +201,52 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, fcNegNormal)); return {FuncTy, Attrs}; } + case RTLIB::impl__ZGVnN2vl8_modf: + case RTLIB::impl__ZGVnN4vl4_modff: + case RTLIB::impl__ZGVsNxvl8_modf: + case RTLIB::impl__ZGVsNxvl4_modff: + case RTLIB::impl_armpl_vmodfq_f64: + case RTLIB::impl_armpl_vmodfq_f32: + case RTLIB::impl_armpl_svmodf_f64_x: + case RTLIB::impl_armpl_svmodf_f32_x: { + AttrBuilder FuncAttrBuilder(Ctx); + + bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4_modff || + LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff || + LibcallImpl == RTLIB::impl_armpl_vmodfq_f32 || + LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x; + + bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl8_modf || + LibcallImpl == RTLIB::impl__ZGVsNxvl4_modff || + LibcallImpl == RTLIB::impl_armpl_svmodf_f64_x || + LibcallImpl == RTLIB::impl_armpl_svmodf_f32_x; + + Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + unsigned EC = IsF32 ? 4 : 2; + VectorType *VecTy = VectorType::get(ScalarTy, EC, IsScalable); + + for (Attribute::AttrKind Attr : CommonFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod)); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + { + AttrBuilder ArgAttrBuilder(Ctx); + for (Attribute::AttrKind AK : CommonPtrArgAttrs) + ArgAttrBuilder.addAttribute(AK); + ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy)); + Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder); + } + + PointerType *PtrTy = PointerType::get(Ctx, 0); + SmallVector ArgTys = {VecTy, PtrTy}; + if (hasVectorMaskArgument(LibcallImpl)) + ArgTys.push_back(VectorType::get(Type::getInt1Ty(Ctx), EC, IsScalable)); + + return {FunctionType::get(VecTy, ArgTys, false), Attrs}; + } case RTLIB::impl__ZGVnN2vl8l8_sincos: case RTLIB::impl__ZGVnN4vl4l4_sincosf: case RTLIB::impl__ZGVsNxvl8l8_sincos: @@ -271,6 +321,8 @@ bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) { /// FIXME: This should be generated by tablegen and support the argument at an /// arbitrary position switch (Impl) { + case RTLIB::impl_armpl_svmodf_f64_x: + case RTLIB::impl_armpl_svmodf_f32_x: case RTLIB::impl_armpl_svsincos_f32_x: case RTLIB::impl_armpl_svsincos_f64_x: case RTLIB::impl_armpl_svsincospi_f32_x: diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll index 1d9cf6a5d77fe..e79e89c95c14a 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll @@ -1,6 +1,10 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s +; CHECK: declare @armpl_svmodf_f32_x(, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] + +; CHECK: declare @armpl_svmodf_f64_x(, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] + ; CHECK: declare void @armpl_svsincos_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] ; CHECK: declare void @armpl_svsincos_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] @@ -9,6 +13,10 @@ ; CHECK: declare void @armpl_svsincospi_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS]] +; CHECK: declare <4 x float> @armpl_vmodfq_f32(<4 x float>, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare <2 x double> @armpl_vmodfq_f64(<2 x double>, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll index 2c6900761b1c0..ef2481111087f 100644 --- a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll @@ -1,18 +1,26 @@ ; REQUIRES: aarch64-registered-target ; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] +; CHECK: declare <2 x double> @_ZGVnN2vl8_modf(<2 x double>, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare <4 x float> @_ZGVnN4vl4_modff(<4 x float>, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @_ZGVnN4vl4l4_sincosf(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare @_ZGVsNxvl4_modff(, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @_ZGVsNxvl4l4_sincosf(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVsNxvl4l4_sincospif(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] +; CHECK: declare @_ZGVsNxvl8_modf(, ptr noalias nonnull writeonly align 16) [[ATTRS]] + ; CHECK: declare void @_ZGVsNxvl8l8_sincos(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] ; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]