diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index f58525754d7a5..1c167af4b0478 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -316,12 +316,22 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { EVT ScalarVT = VT.getScalarType(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + /// Migration flag. IsVectorCall cases directly know about the vector + /// libcall in RuntimeLibcallsInfo and shouldn't try to use + /// LibInfo->getVectorMappingInfo. + bool IsVectorCall = false; + switch (ICA.getID()) { case Intrinsic::modf: LC = RTLIB::getMODF(ScalarVT); break; case Intrinsic::sincospi: - LC = RTLIB::getSINCOSPI(ScalarVT); + LC = RTLIB::getSINCOSPI(VT); + if (LC == RTLIB::UNKNOWN_LIBCALL) + LC = RTLIB::getSINCOSPI(ScalarVT); + else if (VT.isVector()) + IsVectorCall = true; + break; case Intrinsic::sincos: LC = RTLIB::getSINCOS(ScalarVT); @@ -345,17 +355,23 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { LLVMContext &Ctx = RetTy->getContext(); ElementCount VF = getVectorizedTypeVF(RetTy); VecDesc const *VD = nullptr; - for (bool Masked : {false, true}) { - if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked))) - break; + + if (!IsVectorCall) { + for (bool Masked : {false, true}) { + if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked))) + break; + } + if (!VD) + return std::nullopt; } - if (!VD) - return std::nullopt; // Cost the call + mask. auto Cost = thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind); - if (VD->isMasked()) { + + if ((VD && VD->isMasked()) || + (IsVectorCall && + RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl))) { auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF); Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, VecTy, {}, CostKind, 0, nullptr, {}); diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 5b331e4444915..62d2f222110e4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1725,9 +1725,17 @@ class SelectionDAG { /// value. LLVM_ABI bool expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node, - SmallVectorImpl &Results, + SmallVectorImpl &Results, EVT CallType, std::optional CallRetResNo = {}); + // FIXME: Ths should be removed, and form using RTLIB::Libcall should be + // preferred. Callers should resolve the exact type libcall to use. + LLVM_ABI bool + expandMultipleResultFPLibCall(StringRef LibcallName, CallingConv::ID CC, + SDNode *Node, SmallVectorImpl &Results, + std::optional CallRetResNo = {}, + bool IsVectorMasked = false); + /// Expand the specified \c ISD::VAARG node as the Legalize pass would. LLVM_ABI SDValue expandVAArg(SDNode *Node); diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index c822b6530a441..0afe32a4ecc3c 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -83,16 +83,7 @@ struct RuntimeLibcallsInfo { const Triple &TT, ExceptionHandling ExceptionModel = ExceptionHandling::None, FloatABI::ABIType FloatABI = FloatABI::Default, - EABI EABIVersion = EABI::Default, StringRef ABIName = "") { - // FIXME: The ExceptionModel parameter is to handle the field in - // TargetOptions. This interface fails to distinguish the forced disable - // case for targets which support exceptions by default. This should - // probably be a module flag and removed from TargetOptions. - if (ExceptionModel == ExceptionHandling::None) - ExceptionModel = TT.getDefaultExceptionHandling(); - - initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); - } + EABI EABIVersion = EABI::Default, StringRef ABIName = ""); explicit RuntimeLibcallsInfo(const Module &M); @@ -170,6 +161,10 @@ struct RuntimeLibcallsInfo { getFunctionTy(LLVMContext &Ctx, const Triple &TT, const DataLayout &DL, RTLIB::LibcallImpl LibcallImpl) const; + /// Returns true if the function has a vector mask argument, which is assumed + /// to be the last argument. + static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl); + private: LLVM_ABI static iota_range lookupLibcallImplNameImpl(StringRef Name); diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 24c1b035d0dda..a0b52395498c5 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -182,6 +182,10 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in { def MODF_#FPTy : RuntimeLibcall; } +foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in { + def SINCOSPI_#VecTy : RuntimeLibcall; +} + def FEGETENV : RuntimeLibcall; def FESETENV : RuntimeLibcall; @@ -971,10 +975,6 @@ def frexpf : RuntimeLibcallImpl; def frexp : RuntimeLibcallImpl; defm frexpl : LibmLongDoubleLibCall; -def sincospif : RuntimeLibcallImpl; -def sincospi : RuntimeLibcallImpl; -defm sincospil : LibmLongDoubleLibCall; - def modff : RuntimeLibcallImpl; def modf : RuntimeLibcallImpl; defm modfl : LibmLongDoubleLibCall; @@ -1051,6 +1051,15 @@ def sincosf : RuntimeLibcallImpl; def sincos : RuntimeLibcallImpl; defm sincosl : LibmLongDoubleLibCall; +// Exists in sun math library +def sincospif : RuntimeLibcallImpl; +def sincospi : RuntimeLibcallImpl; +defm sincospil : LibmLongDoubleLibCall; + +// Exists on macOS +def __sincospif : RuntimeLibcallImpl; +def __sincospi : RuntimeLibcallImpl; + def bzero : RuntimeLibcallImpl; def __bzero : RuntimeLibcallImpl; @@ -1078,6 +1087,28 @@ def __security_check_cookie : RuntimeLibcallImpl; def __security_check_cookie_arm64ec : RuntimeLibcallImpl; +//===----------------------------------------------------------------------===// +// sleef calls +//===----------------------------------------------------------------------===// + +defset list SleefLibcalls = { + def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl; + def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl; + def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl; + def _ZGVsNxvl8l8_sincospi : RuntimeLibcallImpl; +} + +//===----------------------------------------------------------------------===// +// ARMPL calls +//===----------------------------------------------------------------------===// + +defset list ARMPLLibcalls = { + def armpl_vsincospiq_f32 : RuntimeLibcallImpl; + def armpl_vsincospiq_f64 : RuntimeLibcallImpl; + def armpl_svsincospi_f32_x : RuntimeLibcallImpl; + def armpl_svsincospi_f64_x : RuntimeLibcallImpl; +} + //===----------------------------------------------------------------------===// // F128 libm Runtime Libcalls //===----------------------------------------------------------------------===// @@ -1206,7 +1237,9 @@ defvar DefaultLibcallImpls32 = (add DefaultRuntimeLibcallImpls); defvar DefaultLibcallImpls64 = (add DefaultRuntimeLibcallImpls, Int128RTLibcalls); -defvar DarwinSinCosStret = LibcallImpls<(add __sincosf_stret, __sincos_stret), +// TODO: Guessing sincospi added at same time as sincos_stret +defvar DarwinSinCosStret = LibcallImpls<(add __sincosf_stret, __sincos_stret, + __sincospif, __sincospi), darwinHasSinCosStret>; defvar DarwinExp10 = LibcallImpls<(add __exp10f, __exp10), darwinHasExp10>; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 316aacdf6978e..a0baf821698a8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4842,9 +4842,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results); - if (!Expanded) - llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!"); + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT); + if (!Expanded) { + DAG.getContext()->emitError(Twine("no libcall available for ") + + Node->getOperationName(&DAG)); + SDValue Poison = DAG.getPOISON(VT); + Results.push_back(Poison); + Results.push_back(Poison); + } + break; } case ISD::FLOG: @@ -4934,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) : RTLIB::getFREXP(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, + bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0); if (!Expanded) llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 58983cb57d7f6..29c4dac12a81a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1726,7 +1726,8 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); SmallVector Results; - DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); + DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0), + CallRetResNo); for (auto [ResNo, Res] : enumerate(Results)) { SDValue Lo, Hi; GetPairElements(Res, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 94751be5b7986..f5a54497c8a98 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1268,20 +1268,30 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { return; break; - case ISD::FSINCOS: + case ISD::FSINCOSPI: { + EVT VT = Node->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT); + if (LC != RTLIB::UNKNOWN_LIBCALL && + DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) + return; + + // TODO: Try to see if there's a narrower call available to use before + // scalarizing. + break; + } + case ISD::FSINCOS: { + // FIXME: Try to directly match vector case like fsincospi EVT VT = Node->getValueType(0).getVectorElementType(); - RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS - ? RTLIB::getSINCOS(VT) - : RTLIB::getSINCOSPI(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results)) + RTLIB::Libcall LC = RTLIB::getSINCOS(VT); + if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT)) return; break; } case ISD::FMODF: { - RTLIB::Libcall LC = - RTLIB::getMODF(Node->getValueType(0).getVectorElementType()); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, + EVT VT = Node->getValueType(0).getVectorElementType(); + RTLIB::Libcall LC = RTLIB::getMODF(VT); + if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT, /*CallRetResNo=*/0)) return; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e70fa6e0349da..b5d502b90c90c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2514,18 +2514,20 @@ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, bool SelectionDAG::expandMultipleResultFPLibCall( RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl &Results, - std::optional CallRetResNo) { - LLVMContext &Ctx = *getContext(); - EVT VT = Node->getValueType(0); - unsigned NumResults = Node->getNumValues(); - + EVT CallVT, std::optional CallRetResNo) { if (LC == RTLIB::UNKNOWN_LIBCALL) return false; - const char *LCName = TLI->getLibcallName(LC); - if (!LCName) + EVT VT = Node->getValueType(0); + + RTLIB::LibcallImpl Impl = TLI->getLibcallImpl(LC); + if (Impl == RTLIB::Unsupported) return false; + StringRef LCName = TLI->getLibcallImplName(Impl); + + // FIXME: This should not use TargetLibraryInfo. There should be + // RTLIB::Libcall entries for each used vector type, and directly matched. auto getVecDesc = [&]() -> VecDesc const * { for (bool Masked : {false, true}) { if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( @@ -2538,9 +2540,34 @@ bool SelectionDAG::expandMultipleResultFPLibCall( // For vector types, we must find a vector mapping for the libcall. VecDesc const *VD = nullptr; - if (VT.isVector() && !(VD = getVecDesc())) + if (VT.isVector() && !CallVT.isVector() && !(VD = getVecDesc())) return false; + bool IsMasked = (VD && VD->isMasked()) || + RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl); + + // This wrapper function exists because getVectorMappingInfo works in terms of + // function names instead of RTLIB enums. + + // FIXME: If we used a vector mapping, this assumes the calling convention of + // the vector function is the same as the scalar. + + StringRef Name = VD ? VD->getVectorFnName() : LCName; + + return expandMultipleResultFPLibCall(Name, + TLI->getLibcallImplCallingConv(Impl), + Node, Results, CallRetResNo, IsMasked); +} + +// FIXME: This belongs in TargetLowering +bool SelectionDAG::expandMultipleResultFPLibCall( + StringRef Name, CallingConv::ID CC, SDNode *Node, + SmallVectorImpl &Results, std::optional CallRetResNo, + bool IsMasked) { + LLVMContext &Ctx = *getContext(); + EVT VT = Node->getValueType(0); + unsigned NumResults = Node->getNumValues(); + // Find users of the node that store the results (and share input chains). The // destination pointers can be used instead of creating stack allocations. SDValue StoresInChain; @@ -2598,7 +2625,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall( SDLoc DL(Node); // Pass the vector mask (if required). - if (VD && VD->isMasked()) { + if (IsMasked) { EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); @@ -2608,11 +2635,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall( ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) : Type::getVoidTy(Ctx); SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); - SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, - TLI->getPointerTy(getDataLayout())); + SDValue Callee = + getExternalSymbol(Name.data(), TLI->getPointerTy(getDataLayout())); TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( - TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args)); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(CC, RetType, Callee, + std::move(Args)); auto [Call, CallChain] = TLI->LowerCallTo(CLI); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 1cc591c17f9c3..814b4b57a0b9b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -430,6 +430,24 @@ RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) { } RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) { + // TODO: Tablegen should generate this function + if (RetVT.isVector()) { + if (!RetVT.isSimple()) + return RTLIB::UNKNOWN_LIBCALL; + switch (RetVT.getSimpleVT().SimpleTy) { + case MVT::v4f32: + return RTLIB::SINCOSPI_V4F32; + case MVT::v2f64: + return RTLIB::SINCOSPI_V2F64; + case MVT::nxv4f32: + return RTLIB::SINCOSPI_NXV4F32; + case MVT::nxv2f64: + return RTLIB::SINCOSPI_NXV2F64; + default: + return RTLIB::UNKNOWN_LIBCALL; + } + } + return getFPLibCall(RetVT, SINCOSPI_F32, SINCOSPI_F64, SINCOSPI_F80, SINCOSPI_F128, SINCOSPI_PPCF128); } diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index f4c5c6ff35af6..795621701d910 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringTable.h" #include "llvm/IR/Module.h" +#include "llvm/IR/SystemLibraries.h" #include "llvm/Support/Debug.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" @@ -25,6 +26,40 @@ using namespace RTLIB; #define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME #include "llvm/IR/RuntimeLibcalls.inc" +RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT, + ExceptionHandling ExceptionModel, + FloatABI::ABIType FloatABI, + EABI EABIVersion, StringRef ABIName) { + // FIXME: The ExceptionModel parameter is to handle the field in + // TargetOptions. This interface fails to distinguish the forced disable + // case for targets which support exceptions by default. This should + // probably be a module flag and removed from TargetOptions. + if (ExceptionModel == ExceptionHandling::None) + ExceptionModel = TT.getDefaultExceptionHandling(); + + initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); + + // TODO: Tablegen should generate these sets + switch (ClVectorLibrary) { + case VectorLibrary::SLEEFGNUABI: + for (RTLIB::LibcallImpl Impl : + {RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi, + RTLIB::impl__ZGVsNxvl4l4_sincospif, + RTLIB::impl__ZGVsNxvl8l8_sincospi}) + setAvailable(Impl); + break; + case VectorLibrary::ArmPL: + for (RTLIB::LibcallImpl Impl : + {RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64, + RTLIB::impl_armpl_svsincospi_f32_x, + RTLIB::impl_armpl_svsincospi_f64_x}) + setAvailable(Impl); + break; + default: + break; + } +} + RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Module &M) : RuntimeLibcallsInfo(M.getTargetTriple()) { // TODO: Consider module flags @@ -88,6 +123,8 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, static constexpr Attribute::AttrKind CommonFnAttrs[] = { Attribute::NoCallback, Attribute::NoFree, Attribute::NoSync, Attribute::NoUnwind, Attribute::WillReturn}; + static constexpr Attribute::AttrKind CommonPtrArgAttrs[] = { + Attribute::NoAlias, Attribute::WriteOnly, Attribute::NonNull}; switch (LibcallImpl) { case RTLIB::impl___sincos_stret: @@ -151,9 +188,67 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT, fcNegNormal)); return {FuncTy, Attrs}; } + case RTLIB::impl__ZGVnN4vl4l4_sincospif: + case RTLIB::impl__ZGVnN2vl8l8_sincospi: + case RTLIB::impl__ZGVsNxvl4l4_sincospif: + case RTLIB::impl__ZGVsNxvl8l8_sincospi: + case RTLIB::impl_armpl_vsincospiq_f32: + case RTLIB::impl_armpl_vsincospiq_f64: + case RTLIB::impl_armpl_svsincospi_f32_x: + case RTLIB::impl_armpl_svsincospi_f64_x: { + AttrBuilder FuncAttrBuilder(Ctx); + + bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif || + LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || + LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 || + LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x; + Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + unsigned EC = IsF32 ? 4 : 2; + + bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif || + LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi || + LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x || + LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x; + VectorType *VecTy = VectorType::get(ScalarTy, EC, IsScalable); + + for (Attribute::AttrKind Attr : CommonFnAttrs) + FuncAttrBuilder.addAttribute(Attr); + FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod)); + + AttributeList Attrs; + Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder); + + { + AttrBuilder ArgAttrBuilder(Ctx); + for (Attribute::AttrKind AK : CommonPtrArgAttrs) + ArgAttrBuilder.addAttribute(AK); + ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy)); + Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder); + Attrs = Attrs.addParamAttributes(Ctx, 2, ArgAttrBuilder); + } + + PointerType *PtrTy = PointerType::get(Ctx, 0); + SmallVector ArgTys = {VecTy, PtrTy, PtrTy}; + if (hasVectorMaskArgument(LibcallImpl)) + ArgTys.push_back(VectorType::get(Type::getInt1Ty(Ctx), EC, IsScalable)); + + return {FunctionType::get(Type::getVoidTy(Ctx), ArgTys, false), Attrs}; + } default: return {}; } return {}; } + +bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) { + /// FIXME: This should be generated by tablegen and support the argument at an + /// arbitrary position + switch (Impl) { + case RTLIB::impl_armpl_svsincospi_f32_x: + case RTLIB::impl_armpl_svsincospi_f64_x: + return true; + default: + return false; + } +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 05a854a0bf3fa..5bce539c45341 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -635,6 +635,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FROUNDEVEN, VT, Action); setOperationAction(ISD::FTRUNC, VT, Action); setOperationAction(ISD::FLDEXP, VT, Action); + setOperationAction(ISD::FSINCOSPI, VT, Action); }; if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll new file mode 100644 index 0000000000000..d074d9ae24641 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll @@ -0,0 +1,13 @@ +; RUN: not llc -mtriple=aarch64-gnu-linux -filetype=null %s 2>&1 | FileCheck %s + +; CHECK: error: no libcall available for fsincospi +define { float, float } @test_sincospi_f32(float %a) { + %result = call { float, float } @llvm.sincospi.f32(float %a) + ret { float, float } %result +} + +; CHECK: error: no libcall available for fsincospi +define { double, double } @test_sincospi_f64(double %a) { + %result = call { double, double } @llvm.sincospi.f64(double %a) + ret { double, double } %result +} diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll index d1d7d92adc05a..b386df077c09d 100644 --- a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll +++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll @@ -1,268 +1,250 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=arm64-apple-macosx10.9 < %s | FileCheck %s -define { half, half } @test_sincospi_f16(half %a) { +define { half, half } @test_sincospi_f16(half %a) #0 { ; CHECK-LABEL: test_sincospi_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: bl sincospif +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: bl ___sincospif ; CHECK-NEXT: ldp s1, s0, [sp, #8] +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: fcvt h1, s1 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %result = call { half, half } @llvm.sincospi.f16(half %a) ret { half, half } %result } -define half @test_sincospi_f16_only_use_sin(half %a) { +define half @test_sincospi_f16_only_use_sin(half %a) #0 { ; CHECK-LABEL: test_sincospi_f16_only_use_sin: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: bl sincospif +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: bl ___sincospif ; CHECK-NEXT: ldr s0, [sp, #12] +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %result = call { half, half } @llvm.sincospi.f16(half %a) %result.0 = extractvalue { half, half } %result, 0 ret half %result.0 } -define half @test_sincospi_f16_only_use_cos(half %a) { +define half @test_sincospi_f16_only_use_cos(half %a) #0 { ; CHECK-LABEL: test_sincospi_f16_only_use_cos: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: bl sincospif +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: bl ___sincospif ; CHECK-NEXT: ldr s0, [sp, #8] +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %result = call { half, half } @llvm.sincospi.f16(half %a) %result.1 = extractvalue { half, half } %result, 1 ret half %result.1 } -define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) { +define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 { ; CHECK-LABEL: test_sincospi_v2f16: -; CHECK: // %bb.0: +; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: add x0, sp, #36 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: fcvt s0, h1 -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0[1] +; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #20 +; CHECK-NEXT: add x1, sp, #16 ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: mov h0, v0[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #60 -; CHECK-NEXT: add x1, sp, #56 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h0, v0[3] ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldp s2, s0, [sp, #32] -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: ldp s3, s1, [sp, #24] +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldp s2, s0, [sp, #24] +; CHECK-NEXT: ldp s3, s1, [sp, #16] +; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload ; CHECK-NEXT: fcvt h4, s0 ; CHECK-NEXT: fcvt h2, s2 ; CHECK-NEXT: fcvt h0, s1 ; CHECK-NEXT: fcvt h1, s3 -; CHECK-NEXT: ldp s5, s3, [sp, #40] +; CHECK-NEXT: ldp s5, s3, [sp, #32] ; CHECK-NEXT: fcvt h3, s3 -; CHECK-NEXT: mov v0.h[1], v4.h[0] +; CHECK-NEXT: mov.h v0[1], v4[0] ; CHECK-NEXT: fcvt h4, s5 -; CHECK-NEXT: mov v1.h[1], v2.h[0] -; CHECK-NEXT: ldp s5, s2, [sp, #56] -; CHECK-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NEXT: mov.h v1[1], v2[0] +; CHECK-NEXT: ldp s5, s2, [sp, #40] +; CHECK-NEXT: mov.h v0[2], v3[0] ; CHECK-NEXT: fcvt h2, s2 ; CHECK-NEXT: fcvt h3, s5 -; CHECK-NEXT: mov v1.h[2], v4.h[0] -; CHECK-NEXT: mov v0.h[3], v2.h[0] -; CHECK-NEXT: mov v1.h[3], v3.h[0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: mov.h v1[2], v4[0] +; CHECK-NEXT: mov.h v0[3], v2[0] +; CHECK-NEXT: mov.h v1[3], v3[0] +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a) ret { <2 x half>, <2 x half> } %result } -define { float, float } @test_sincospi_f32(float %a) { +define { float, float } @test_sincospi_f32(float %a) #0 { ; CHECK-LABEL: test_sincospi_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: add x0, sp, #12 ; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: bl sincospif +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: bl ___sincospif ; CHECK-NEXT: ldp s1, s0, [sp, #8] -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %result = call { float, float } @llvm.sincospi.f32(float %a) ret { float, float } %result } -define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) { +define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) #0 { ; CHECK-LABEL: test_sincospi_v3f32: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: add x0, sp, #20 -; CHECK-NEXT: add x1, sp, #16 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 -; CHECK-NEXT: add x19, sp, #28 -; CHECK-NEXT: add x20, sp, #24 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill +; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: add x19, sp, #36 +; CHECK-NEXT: add x20, sp, #32 +; CHECK-NEXT: mov s0, v0[1] +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload ; CHECK-NEXT: add x0, sp, #44 ; CHECK-NEXT: add x1, sp, #40 ; CHECK-NEXT: add x21, sp, #44 ; CHECK-NEXT: add x22, sp, #40 -; CHECK-NEXT: mov s0, v0.s[2] -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldp s1, s0, [sp, #16] -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ld1 { v0.s }[1], [x19] -; CHECK-NEXT: ld1 { v1.s }[1], [x20] -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ld1 { v0.s }[2], [x21] -; CHECK-NEXT: ld1 { v1.s }[2], [x22] -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: mov s0, v0[2] +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldp s1, s0, [sp, #24] +; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: ld1.s { v0 }[1], [x19] +; CHECK-NEXT: ld1.s { v1 }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ld1.s { v0 }[2], [x21] +; CHECK-NEXT: ld1.s { v1 }[2], [x22] +; CHECK-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a) ret { <3 x float>, <3 x float> } %result } -define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) { +define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) #0 { ; CHECK-LABEL: test_sincospi_v2f32: -; CHECK: // %bb.0: +; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: add x1, sp, #40 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: add x0, sp, #28 ; CHECK-NEXT: add x1, sp, #24 -; CHECK-NEXT: add x19, sp, #28 -; CHECK-NEXT: add x20, sp, #24 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl sincospif -; CHECK-NEXT: ldp s1, s0, [sp, #40] -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ld1 { v0.s }[1], [x19] -; CHECK-NEXT: ld1 { v1.s }[1], [x20] -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: stp x20, x19, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill +; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #20 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: add x19, sp, #20 +; CHECK-NEXT: add x20, sp, #16 +; CHECK-NEXT: mov s0, v0[1] +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldp s1, s0, [sp, #24] +; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ld1.s { v0 }[1], [x19] +; CHECK-NEXT: ld1.s { v1 }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a) ret { <2 x float>, <2 x float> } %result } -define { double, double } @test_sincospi_f64(double %a) { +define { double, double } @test_sincospi_f64(double %a) #0 { ; CHECK-LABEL: test_sincospi_f64: -; CHECK: // %bb.0: +; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #24 -; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: bl sincospi -; CHECK-NEXT: ldr d0, [sp, #24] -; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: bl ___sincospi +; CHECK-NEXT: ldp d1, d0, [sp] +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret %result = call { double, double } @llvm.sincospi.f64(double %a) ret { double, double } %result } -define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) { +define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) #0 { ; CHECK-LABEL: test_sincospi_v2f64: -; CHECK: // %bb.0: +; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #80 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: add x0, sp, #56 -; CHECK-NEXT: add x1, sp, #40 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl sincospi -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #32 -; CHECK-NEXT: add x1, sp, #24 -; CHECK-NEXT: add x19, sp, #32 -; CHECK-NEXT: add x20, sp, #24 -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: bl sincospi -; CHECK-NEXT: ldr d0, [sp, #56] -; CHECK-NEXT: ldr d1, [sp, #40] -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: ld1 { v0.d }[1], [x19] -; CHECK-NEXT: ld1 { v1.d }[1], [x20] -; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp] ; 16-byte Folded Spill +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl ___sincospi +; CHECK-NEXT: ldr q0, [sp] ; 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: add x19, sp, #24 +; CHECK-NEXT: add x20, sp, #16 +; CHECK-NEXT: mov d0, v0[1] +; CHECK-NEXT: bl ___sincospi +; CHECK-NEXT: ldp d1, d0, [sp, #32] +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ld1.d { v0 }[1], [x19] +; CHECK-NEXT: ld1.d { v1 }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a) ret { <2 x double>, <2 x double> } %result } + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/ARM/llvm.sincospi.ll b/llvm/test/CodeGen/ARM/llvm.sincospi.ll new file mode 100644 index 0000000000000..91bf0aaf1806a --- /dev/null +++ b/llvm/test/CodeGen/ARM/llvm.sincospi.ll @@ -0,0 +1,249 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=thumbv7-apple-ios7.0.0 < %s | FileCheck %s + +define { half, half } @test_sincospi_f16(half %a) #0 { +; CHECK-LABEL: test_sincospi_f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl ___extendhfsf2 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r4, pc} + %result = call { half, half } @llvm.sincospi.f16(half %a) + ret { half, half } %result +} + +define half @test_sincospi_f16_only_use_sin(half %a) #0 { +; CHECK-LABEL: test_sincospi_f16_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: str lr, [sp, #-4]! +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl ___extendhfsf2 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: ldr lr, [sp], #4 +; CHECK-NEXT: bx lr + %result = call { half, half } @llvm.sincospi.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_sincospi_f16_only_use_cos(half %a) #0 { +; CHECK-LABEL: test_sincospi_f16_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: str lr, [sp, #-4]! +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl ___extendhfsf2 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: ldr lr, [sp], #4 +; CHECK-NEXT: bx lr + %result = call { half, half } @llvm.sincospi.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 { +; CHECK-LABEL: test_sincospi_v2f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl ___extendhfsf2 +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl ___extendhfsf2 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr r0, [sp, #12] +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: strh.w r0, [sp, #22] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: strh.w r0, [sp, #20] +; CHECK-NEXT: add r0, sp, #20 +; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32] +; CHECK-NEXT: ldr r0, [sp, #8] +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: ldr r1, [sp] +; CHECK-NEXT: strh.w r0, [sp, #18] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl ___truncsfhf2 +; CHECK-NEXT: strh.w r0, [sp, #16] +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vmovl.u16 q9, d8 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d18[0] +; CHECK-NEXT: vmov.32 r1, d18[1] +; CHECK-NEXT: vmov.32 r2, d16[0] +; CHECK-NEXT: vmov.32 r3, d16[1] +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, pc} + %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_sincospi_f32(float %a) #0 { +; CHECK-LABEL: test_sincospi_f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: str lr, [sp, #-4]! +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldrd r1, r0, [sp], #8 +; CHECK-NEXT: ldr lr, [sp], #4 +; CHECK-NEXT: bx lr + %result = call { float, float } @llvm.sincospi.f32(float %a) + ret { float, float } %result +} + +define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) #0 { +; CHECK-LABEL: test_sincospi_v2f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: str lr, [sp, #-4]! +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: vldr s1, [sp, #4] +; CHECK-NEXT: vldr s3, [sp] +; CHECK-NEXT: vldr s0, [sp, #12] +; CHECK-NEXT: vldr s2, [sp, #8] +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: ldr lr, [sp], #4 +; CHECK-NEXT: bx lr + %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) #0 { +; CHECK-LABEL: test_sincospi_v3f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: ldr r0, [sp, #36] +; CHECK-NEXT: vmov d0, r7, r6 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: add.w r2, r4, #16 +; CHECK-NEXT: vmov d1, r5, r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vldr s1, [sp, #8] +; CHECK-NEXT: vldr s3, [sp, #12] +; CHECK-NEXT: vldr s2, [sp, #4] +; CHECK-NEXT: vldr s0, [sp] +; CHECK-NEXT: vst1.32 {d1}, [r1:64]! +; CHECK-NEXT: vst1.32 {d0}, [r2:64]! +; CHECK-NEXT: bl ___sincospif +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} + %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a) + ret { <3 x float>, <3 x float> } %result +} + +define { double, double } @test_sincospi_f64(double %a) #0 { +; CHECK-LABEL: test_sincospi_f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r7, lr} +; CHECK-NEXT: add r7, sp, #4 +; CHECK-NEXT: sub sp, #20 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: bfc r4, #0, #3 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: bl ___sincospi +; CHECK-NEXT: subs r4, r7, #4 +; CHECK-NEXT: ldrd r0, r1, [sp, #8] +; CHECK-NEXT: ldrd r2, r3, [sp] +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop {r4, r7, pc} + %result = call { double, double } @llvm.sincospi.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) #0 { +; CHECK-LABEL: test_sincospi_v2f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: add r7, sp, #16 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: bfc r4, #0, #3 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: ldr r1, [r7, #8] +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: add r2, sp, #24 +; CHECK-NEXT: add r3, sp, #16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: bl ___sincospi +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: bl ___sincospi +; CHECK-NEXT: vldr d19, [sp, #24] +; CHECK-NEXT: vldr d18, [sp, #8] +; CHECK-NEXT: vldr d17, [sp, #16] +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vst1.32 {d18, d19}, [r4]! +; CHECK-NEXT: vst1.32 {d16, d17}, [r4] +; CHECK-NEXT: sub.w r4, r7, #16 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} + %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincos.ll b/llvm/test/CodeGen/PowerPC/llvm.sincos.ll index aaf81ff814488..5b4e91c449522 100644 --- a/llvm/test/CodeGen/PowerPC/llvm.sincos.ll +++ b/llvm/test/CodeGen/PowerPC/llvm.sincos.ll @@ -26,30 +26,6 @@ define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128(ppc_fp128 %a) { ret { ppc_fp128, ppc_fp128 } %result } -define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128(ppc_fp128 %a) { -; CHECK-LABEL: test_sincospi_ppcf128: -; CHECK: # %bb.0: -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: addi r5, r1, 48 -; CHECK-NEXT: addi r6, r1, 32 -; CHECK-NEXT: bl sincospil -; CHECK-NEXT: nop -; CHECK-NEXT: lfd f1, 48(r1) -; CHECK-NEXT: lfd f2, 56(r1) -; CHECK-NEXT: lfd f3, 32(r1) -; CHECK-NEXT: lfd f4, 40(r1) -; CHECK-NEXT: addi r1, r1, 64 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr - %result = call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a) - ret { ppc_fp128, ppc_fp128 } %result -} - ; FIXME: This could be made a tail call with the default expansion of llvm.sincos. define void @test_sincos_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) { ; CHECK-LABEL: test_sincos_ppcf128_void_tail_call: @@ -73,29 +49,6 @@ define void @test_sincos_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_s ret void } -; FIXME: This could be made a tail call with the default expansion of llvm.sincospi. -define void @test_sincospi_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) { -; CHECK-LABEL: test_sincospi_ppcf128_void_tail_call: -; CHECK: # %bb.0: -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -32(r1) -; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl sincospil -; CHECK-NEXT: nop -; CHECK-NEXT: addi r1, r1, 32 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr - %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a) - %result.0 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0 - %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1 - store ppc_fp128 %result.0, ptr %out_sin, align 16 - store ppc_fp128 %result.1, ptr %out_cos, align 16 - ret void -} - ; NOTE: This would need a struct-return library call for llvm.sincos to become a tail call. define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128_tail_call(ppc_fp128 %a) { ; CHECK-LABEL: test_sincos_ppcf128_tail_call: @@ -120,28 +73,3 @@ define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128_tail_call(ppc_fp128 %a) { %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %a) ret { ppc_fp128, ppc_fp128 } %result } - -; NOTE: This would need a struct-return library call for llvm.sincospi to become a tail call. -define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128_tail_call(ppc_fp128 %a) { -; CHECK-LABEL: test_sincospi_ppcf128_tail_call: -; CHECK: # %bb.0: -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: addi r5, r1, 48 -; CHECK-NEXT: addi r6, r1, 32 -; CHECK-NEXT: bl sincospil -; CHECK-NEXT: nop -; CHECK-NEXT: lfd f1, 48(r1) -; CHECK-NEXT: lfd f2, 56(r1) -; CHECK-NEXT: lfd f3, 32(r1) -; CHECK-NEXT: lfd f4, 40(r1) -; CHECK-NEXT: addi r1, r1, 64 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr - %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a) - ret { ppc_fp128, ppc_fp128 } %result -} diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ll b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ll new file mode 100644 index 0000000000000..75e7559386f16 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ll @@ -0,0 +1,21 @@ +; RUN: not llc -mtriple=powerpc64le-gnu-linux -filetype=null %s 2>&1 | FileCheck %s + +; CHECK: error: no libcall available for fsincospi +define { half, half } @test_sincospi_f16(half %a) #0 { + %result = call { half, half } @llvm.sincospi.f16(half %a) + ret { half, half } %result +} + +; CHECK: error: no libcall available for fsincospi +define { float, float } @test_sincospi_f32(float %a) #0 { + %result = call { float, float } @llvm.sincospi.f32(float %a) + ret { float, float } %result +} + +; CHECK: error: no libcall available for fsincospi +define { double, double } @test_sincospi_f64(double %a) #0 { + %result = call { double, double } @llvm.sincospi.f64(double %a) + ret { double, double } %result +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll new file mode 100644 index 0000000000000..bc656bb785e9e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll @@ -0,0 +1,25 @@ +; XFAIL: * +; FIXME: asserts +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-gnu-linux -filetype=null \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names %s + +define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128(ppc_fp128 %a) { + %result = call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a) + ret { ppc_fp128, ppc_fp128 } %result +} + +; FIXME: This could be made a tail call with the default expansion of llvm.sincospi. +define void @test_sincospi_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) { + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a) + %result.0 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0 + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1 + store ppc_fp128 %result.0, ptr %out_sin, align 16 + store ppc_fp128 %result.1, ptr %out_cos, align 16 + ret void +} + +; NOTE: This would need a struct-return library call for llvm.sincospi to become a tail call. +define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128_tail_call(ppc_fp128 %a) { + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a) + ret { ppc_fp128, ppc_fp128 } %result +} diff --git a/llvm/test/CodeGen/X86/llvm.sincospi.ll b/llvm/test/CodeGen/X86/llvm.sincospi.ll new file mode 100644 index 0000000000000..5546c66deba30 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.sincospi.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=x86_64-apple-macosx10.9 < %s | FileCheck %s + +define { half, half } @test_sincospi_f16(half %a) #0 { +; CHECK-LABEL: test_sincospi_f16: +; CHECK: ## %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: movzwl %ax, %edi +; CHECK-NEXT: callq ___extendhfsf2 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm1 +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %result = call { half, half } @llvm.sincospi.f16(half %a) + ret { half, half } %result +} + +define half @test_sincospi_f16_only_use_sin(half %a) #0 { +; CHECK-LABEL: test_sincospi_f16_only_use_sin: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: movzwl %ax, %edi +; CHECK-NEXT: callq ___extendhfsf2 +; CHECK-NEXT: movq %rsp, %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %result = call { half, half } @llvm.sincospi.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_sincospi_f16_only_use_cos(half %a) #0 { +; CHECK-LABEL: test_sincospi_f16_only_use_cos: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: movzwl %ax, %edi +; CHECK-NEXT: callq ___extendhfsf2 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: movq %rsp, %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %result = call { half, half } @llvm.sincospi.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 { +; CHECK-LABEL: test_sincospi_v2f16: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: pextrw $0, %xmm0, %ebx +; CHECK-NEXT: psrld $16, %xmm0 +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: movzwl %ax, %edi +; CHECK-NEXT: callq ___extendhfsf2 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: movq %rsp, %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movzwl %bx, %edi +; CHECK-NEXT: callq ___extendhfsf2 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ___truncsfhf2 +; CHECK-NEXT: ## kill: def $ax killed $ax def $eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload +; CHECK-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload +; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload +; CHECK-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3] +; CHECK-NEXT: addq $64, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_sincospi_f32(float %a) #0 { +; CHECK-LABEL: test_sincospi_f32: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: movq %rsp, %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %result = call { float, float } @llvm.sincospi.f32(float %a) + ret { float, float } %result +} + +define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) #0 { +; CHECK-LABEL: test_sincospi_v2f32: +; CHECK: ## %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: movq %rsp, %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: retq + %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) #0 { +; CHECK-LABEL: test_sincospi_v3f32: +; CHECK: ## %bb.0: +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospif +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: retq + %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a) + ret { <3 x float>, <3 x float> } %result +} + +define { double, double } @test_sincospi_f64(double %a) #0 { +; CHECK-LABEL: test_sincospi_f64: +; CHECK: ## %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospi +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %result = call { double, double } @llvm.sincospi.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) #0 { +; CHECK-LABEL: test_sincospi_v2f64: +; CHECK: ## %bb.0: +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; CHECK-NEXT: callq ___sincospi +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: movq %rsp, %rsi +; CHECK-NEXT: callq ___sincospi +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: retq + %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll new file mode 100644 index 0000000000000..c45f319f80122 --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll @@ -0,0 +1,12 @@ +; REQUIRES: aarch64-registered-target +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s + +; CHECK: declare void @armpl_svsincospi_f32_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @armpl_svsincospi_f64_x(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, ) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll new file mode 100644 index 0000000000000..7972e0ca1c487 --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll @@ -0,0 +1,12 @@ +; REQUIRES: aarch64-registered-target +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s + +; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl4l4_sincospif(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @_ZGVsNxvl8l8_sincospi(, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }