diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f58525754d7a5..1c167af4b0478 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -316,12 +316,22 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     EVT ScalarVT = VT.getScalarType();
     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
 
+    /// Migration flag. IsVectorCall cases directly know about the vector
+    /// libcall in RuntimeLibcallsInfo and shouldn't try to use
+    /// LibInfo->getVectorMappingInfo.
+    bool IsVectorCall = false;
+
     switch (ICA.getID()) {
     case Intrinsic::modf:
       LC = RTLIB::getMODF(ScalarVT);
       break;
     case Intrinsic::sincospi:
-      LC = RTLIB::getSINCOSPI(ScalarVT);
+      LC = RTLIB::getSINCOSPI(VT);
+      if (LC == RTLIB::UNKNOWN_LIBCALL)
+        LC = RTLIB::getSINCOSPI(ScalarVT);
+      else if (VT.isVector())
+        IsVectorCall = true;
+
       break;
     case Intrinsic::sincos:
       LC = RTLIB::getSINCOS(ScalarVT);
@@ -345,17 +355,23 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     LLVMContext &Ctx = RetTy->getContext();
     ElementCount VF = getVectorizedTypeVF(RetTy);
     VecDesc const *VD = nullptr;
-    for (bool Masked : {false, true}) {
-      if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
-        break;
+
+    if (!IsVectorCall) {
+      for (bool Masked : {false, true}) {
+        if ((VD = LibInfo->getVectorMappingInfo(LCName, VF, Masked)))
+          break;
+      }
+      if (!VD)
+        return std::nullopt;
     }
-    if (!VD)
-      return std::nullopt;
 
     // Cost the call + mask.
     auto Cost =
         thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
-    if (VD->isMasked()) {
+
+    if ((VD && VD->isMasked()) ||
+        (IsVectorCall &&
+         RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl))) {
       auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
       Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
                                       VecTy, {}, CostKind, 0, nullptr, {});
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 5b331e4444915..62d2f222110e4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1725,9 +1725,17 @@ class SelectionDAG {
   /// value.
   LLVM_ABI bool
   expandMultipleResultFPLibCall(RTLIB::Libcall LC, SDNode *Node,
-                                SmallVectorImpl<SDValue> &Results,
+                                SmallVectorImpl<SDValue> &Results, EVT CallType,
                                 std::optional<unsigned> CallRetResNo = {});
 
+  // FIXME: Ths should be removed, and form using RTLIB::Libcall should be
+  // preferred. Callers should resolve the exact type libcall to use.
+  LLVM_ABI bool
+  expandMultipleResultFPLibCall(StringRef LibcallName, CallingConv::ID CC,
+                                SDNode *Node, SmallVectorImpl<SDValue> &Results,
+                                std::optional<unsigned> CallRetResNo = {},
+                                bool IsVectorMasked = false);
+
   /// Expand the specified \c ISD::VAARG node as the Legalize pass would.
   LLVM_ABI SDValue expandVAArg(SDNode *Node);
 
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h
index c822b6530a441..0afe32a4ecc3c 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.h
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.h
@@ -83,16 +83,7 @@ struct RuntimeLibcallsInfo {
       const Triple &TT,
       ExceptionHandling ExceptionModel = ExceptionHandling::None,
       FloatABI::ABIType FloatABI = FloatABI::Default,
-      EABI EABIVersion = EABI::Default, StringRef ABIName = "") {
-    // FIXME: The ExceptionModel parameter is to handle the field in
-    // TargetOptions. This interface fails to distinguish the forced disable
-    // case for targets which support exceptions by default. This should
-    // probably be a module flag and removed from TargetOptions.
-    if (ExceptionModel == ExceptionHandling::None)
-      ExceptionModel = TT.getDefaultExceptionHandling();
-
-    initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName);
-  }
+      EABI EABIVersion = EABI::Default, StringRef ABIName = "");
 
   explicit RuntimeLibcallsInfo(const Module &M);
 
@@ -170,6 +161,10 @@ struct RuntimeLibcallsInfo {
   getFunctionTy(LLVMContext &Ctx, const Triple &TT, const DataLayout &DL,
                 RTLIB::LibcallImpl LibcallImpl) const;
 
+  /// Returns true if the function has a vector mask argument, which is assumed
+  /// to be the last argument.
+  static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl);
+
 private:
   LLVM_ABI static iota_range<RTLIB::LibcallImpl>
   lookupLibcallImplNameImpl(StringRef Name);
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 24c1b035d0dda..a0b52395498c5 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -182,6 +182,10 @@ foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
   def MODF_#FPTy : RuntimeLibcall;
 }
 
+foreach VecTy = ["V4F32", "V2F64", "NXV4F32", "NXV2F64"] in {
+  def SINCOSPI_#VecTy : RuntimeLibcall;
+}
+
 def FEGETENV : RuntimeLibcall;
 def FESETENV : RuntimeLibcall;
 
@@ -971,10 +975,6 @@ def frexpf : RuntimeLibcallImpl<FREXP_F32>;
 def frexp : RuntimeLibcallImpl<FREXP_F64>;
 defm frexpl : LibmLongDoubleLibCall;
 
-def sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
-def sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
-defm sincospil : LibmLongDoubleLibCall;
-
 def modff : RuntimeLibcallImpl<MODF_F32>;
 def modf : RuntimeLibcallImpl<MODF_F64>;
 defm modfl : LibmLongDoubleLibCall;
@@ -1051,6 +1051,15 @@ def sincosf : RuntimeLibcallImpl<SINCOS_F32>;
 def sincos : RuntimeLibcallImpl<SINCOS_F64>;
 defm sincosl : LibmLongDoubleLibCall;
 
+// Exists in sun math library
+def sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
+def sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
+defm sincospil : LibmLongDoubleLibCall;
+
+// Exists on macOS
+def __sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
+def __sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
+
 def bzero : RuntimeLibcallImpl<BZERO>;
 def __bzero : RuntimeLibcallImpl<BZERO>;
 
@@ -1078,6 +1087,28 @@ def __security_check_cookie : RuntimeLibcallImpl<SECURITY_CHECK_COOKIE>;
 def __security_check_cookie_arm64ec : RuntimeLibcallImpl<SECURITY_CHECK_COOKIE,
   "#__security_check_cookie_arm64ec">;
 
+//===----------------------------------------------------------------------===//
+// sleef calls
+//===----------------------------------------------------------------------===//
+
+defset list<RuntimeLibcallImpl> SleefLibcalls = {
+  def _ZGVnN4vl4l4_sincospif : RuntimeLibcallImpl<SINCOSPI_V4F32>;
+  def _ZGVnN2vl8l8_sincospi : RuntimeLibcallImpl<SINCOSPI_V2F64>;
+  def _ZGVsNxvl4l4_sincospif : RuntimeLibcallImpl<SINCOSPI_NXV4F32>;
+  def _ZGVsNxvl8l8_sincospi : RuntimeLibcallImpl<SINCOSPI_NXV2F64>;
+}
+
+//===----------------------------------------------------------------------===//
+// ARMPL calls
+//===----------------------------------------------------------------------===//
+
+defset list<RuntimeLibcallImpl> ARMPLLibcalls = {
+  def armpl_vsincospiq_f32 : RuntimeLibcallImpl<SINCOSPI_V4F32>;
+  def armpl_vsincospiq_f64 : RuntimeLibcallImpl<SINCOSPI_V2F64>;
+  def armpl_svsincospi_f32_x : RuntimeLibcallImpl<SINCOSPI_NXV4F32>;
+  def armpl_svsincospi_f64_x : RuntimeLibcallImpl<SINCOSPI_NXV2F64>;
+}
+
 //===----------------------------------------------------------------------===//
 // F128 libm Runtime Libcalls
 //===----------------------------------------------------------------------===//
@@ -1206,7 +1237,9 @@ defvar DefaultLibcallImpls32 = (add DefaultRuntimeLibcallImpls);
 defvar DefaultLibcallImpls64 = (add DefaultRuntimeLibcallImpls,
                                     Int128RTLibcalls);
 
-defvar DarwinSinCosStret = LibcallImpls<(add __sincosf_stret, __sincos_stret),
+// TODO: Guessing sincospi added at same time as sincos_stret
+defvar DarwinSinCosStret = LibcallImpls<(add __sincosf_stret, __sincos_stret,
+                                             __sincospif, __sincospi),
                                         darwinHasSinCosStret>;
 defvar DarwinExp10 = LibcallImpls<(add __exp10f, __exp10), darwinHasExp10>;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 316aacdf6978e..a0baf821698a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4842,9 +4842,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
                             ? RTLIB::getSINCOS(VT)
                             : RTLIB::getSINCOSPI(VT);
-    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
-    if (!Expanded)
-      llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!");
+    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT);
+    if (!Expanded) {
+      DAG.getContext()->emitError(Twine("no libcall available for ") +
+                                  Node->getOperationName(&DAG));
+      SDValue Poison = DAG.getPOISON(VT);
+      Results.push_back(Poison);
+      Results.push_back(Poison);
+    }
+
     break;
   }
   case ISD::FLOG:
@@ -4934,7 +4940,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     EVT VT = Node->getValueType(0);
     RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
                                                         : RTLIB::getFREXP(VT);
-    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+    bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
                                                       /*CallRetResNo=*/0);
     if (!Expanded)
       llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 58983cb57d7f6..29c4dac12a81a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1726,7 +1726,8 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
     SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
   assert(!N->isStrictFPOpcode() && "strictfp not implemented");
   SmallVector<SDValue> Results;
-  DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
+  DAG.expandMultipleResultFPLibCall(LC, N, Results, N->getValueType(0),
+                                    CallRetResNo);
   for (auto [ResNo, Res] : enumerate(Results)) {
     SDValue Lo, Hi;
     GetPairElements(Res, Lo, Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 94751be5b7986..f5a54497c8a98 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1268,20 +1268,30 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
       return;
 
     break;
-  case ISD::FSINCOS:
+
   case ISD::FSINCOSPI: {
+    EVT VT = Node->getValueType(0);
+    RTLIB::Libcall LC = RTLIB::getSINCOSPI(VT);
+    if (LC != RTLIB::UNKNOWN_LIBCALL &&
+        DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
+      return;
+
+    // TODO: Try to see if there's a narrower call available to use before
+    // scalarizing.
+    break;
+  }
+  case ISD::FSINCOS: {
+    // FIXME: Try to directly match vector case like fsincospi
     EVT VT = Node->getValueType(0).getVectorElementType();
-    RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
-                            ? RTLIB::getSINCOS(VT)
-                            : RTLIB::getSINCOSPI(VT);
-    if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
+    RTLIB::Libcall LC = RTLIB::getSINCOS(VT);
+    if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT))
       return;
     break;
   }
   case ISD::FMODF: {
-    RTLIB::Libcall LC =
-        RTLIB::getMODF(Node->getValueType(0).getVectorElementType());
-    if (DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+    EVT VT = Node->getValueType(0).getVectorElementType();
+    RTLIB::Libcall LC = RTLIB::getMODF(VT);
+    if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, VT,
                                           /*CallRetResNo=*/0))
       return;
     break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e70fa6e0349da..b5d502b90c90c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2514,18 +2514,20 @@ static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
 
 bool SelectionDAG::expandMultipleResultFPLibCall(
     RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results,
-    std::optional<unsigned> CallRetResNo) {
-  LLVMContext &Ctx = *getContext();
-  EVT VT = Node->getValueType(0);
-  unsigned NumResults = Node->getNumValues();
-
+    EVT CallVT, std::optional<unsigned> CallRetResNo) {
   if (LC == RTLIB::UNKNOWN_LIBCALL)
     return false;
 
-  const char *LCName = TLI->getLibcallName(LC);
-  if (!LCName)
+  EVT VT = Node->getValueType(0);
+
+  RTLIB::LibcallImpl Impl = TLI->getLibcallImpl(LC);
+  if (Impl == RTLIB::Unsupported)
     return false;
 
+  StringRef LCName = TLI->getLibcallImplName(Impl);
+
+  // FIXME: This should not use TargetLibraryInfo. There should be
+  // RTLIB::Libcall entries for each used vector type, and directly matched.
   auto getVecDesc = [&]() -> VecDesc const * {
     for (bool Masked : {false, true}) {
       if (VecDesc const *VD = getLibInfo().getVectorMappingInfo(
@@ -2538,9 +2540,34 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
 
   // For vector types, we must find a vector mapping for the libcall.
   VecDesc const *VD = nullptr;
-  if (VT.isVector() && !(VD = getVecDesc()))
+  if (VT.isVector() && !CallVT.isVector() && !(VD = getVecDesc()))
     return false;
 
+  bool IsMasked = (VD && VD->isMasked()) ||
+                  RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl);
+
+  // This wrapper function exists because getVectorMappingInfo works in terms of
+  // function names instead of RTLIB enums.
+
+  // FIXME: If we used a vector mapping, this assumes the calling convention of
+  // the vector function is the same as the scalar.
+
+  StringRef Name = VD ? VD->getVectorFnName() : LCName;
+
+  return expandMultipleResultFPLibCall(Name,
+                                       TLI->getLibcallImplCallingConv(Impl),
+                                       Node, Results, CallRetResNo, IsMasked);
+}
+
+// FIXME: This belongs in TargetLowering
+bool SelectionDAG::expandMultipleResultFPLibCall(
+    StringRef Name, CallingConv::ID CC, SDNode *Node,
+    SmallVectorImpl<SDValue> &Results, std::optional<unsigned> CallRetResNo,
+    bool IsMasked) {
+  LLVMContext &Ctx = *getContext();
+  EVT VT = Node->getValueType(0);
+  unsigned NumResults = Node->getNumValues();
+
   // Find users of the node that store the results (and share input chains). The
   // destination pointers can be used instead of creating stack allocations.
   SDValue StoresInChain;
@@ -2598,7 +2625,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
   SDLoc DL(Node);
 
   // Pass the vector mask (if required).
-  if (VD && VD->isMasked()) {
+  if (IsMasked) {
     EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT);
     SDValue Mask = getBoolConstant(true, DL, MaskVT, VT);
     Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
@@ -2608,11 +2635,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
                       ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
                       : Type::getVoidTy(Ctx);
   SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
-  SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName,
-                                     TLI->getPointerTy(getDataLayout()));
+  SDValue Callee =
+      getExternalSymbol(Name.data(), TLI->getPointerTy(getDataLayout()));
   TargetLowering::CallLoweringInfo CLI(*this);
-  CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
-      TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args));
+  CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(CC, RetType, Callee,
+                                                     std::move(Args));
 
   auto [Call, CallChain] = TLI->LowerCallTo(CLI);
 
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 1cc591c17f9c3..814b4b57a0b9b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -430,6 +430,24 @@ RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) {
 }
 
 RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) {
+  // TODO: Tablegen should generate this function
+  if (RetVT.isVector()) {
+    if (!RetVT.isSimple())
+      return RTLIB::UNKNOWN_LIBCALL;
+    switch (RetVT.getSimpleVT().SimpleTy) {
+    case MVT::v4f32:
+      return RTLIB::SINCOSPI_V4F32;
+    case MVT::v2f64:
+      return RTLIB::SINCOSPI_V2F64;
+    case MVT::nxv4f32:
+      return RTLIB::SINCOSPI_NXV4F32;
+    case MVT::nxv2f64:
+      return RTLIB::SINCOSPI_NXV2F64;
+    default:
+      return RTLIB::UNKNOWN_LIBCALL;
+    }
+  }
+
   return getFPLibCall(RetVT, SINCOSPI_F32, SINCOSPI_F64, SINCOSPI_F80,
                       SINCOSPI_F128, SINCOSPI_PPCF128);
 }
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index f4c5c6ff35af6..795621701d910 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/ADT/StringTable.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/SystemLibraries.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/xxhash.h"
 #include "llvm/TargetParser/ARMTargetParser.h"
@@ -25,6 +26,40 @@ using namespace RTLIB;
 #define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME
 #include "llvm/IR/RuntimeLibcalls.inc"
 
+RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Triple &TT,
+                                         ExceptionHandling ExceptionModel,
+                                         FloatABI::ABIType FloatABI,
+                                         EABI EABIVersion, StringRef ABIName) {
+  // FIXME: The ExceptionModel parameter is to handle the field in
+  // TargetOptions. This interface fails to distinguish the forced disable
+  // case for targets which support exceptions by default. This should
+  // probably be a module flag and removed from TargetOptions.
+  if (ExceptionModel == ExceptionHandling::None)
+    ExceptionModel = TT.getDefaultExceptionHandling();
+
+  initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName);
+
+  // TODO: Tablegen should generate these sets
+  switch (ClVectorLibrary) {
+  case VectorLibrary::SLEEFGNUABI:
+    for (RTLIB::LibcallImpl Impl :
+         {RTLIB::impl__ZGVnN4vl4l4_sincospif, RTLIB::impl__ZGVnN2vl8l8_sincospi,
+          RTLIB::impl__ZGVsNxvl4l4_sincospif,
+          RTLIB::impl__ZGVsNxvl8l8_sincospi})
+      setAvailable(Impl);
+    break;
+  case VectorLibrary::ArmPL:
+    for (RTLIB::LibcallImpl Impl :
+         {RTLIB::impl_armpl_vsincospiq_f32, RTLIB::impl_armpl_vsincospiq_f64,
+          RTLIB::impl_armpl_svsincospi_f32_x,
+          RTLIB::impl_armpl_svsincospi_f64_x})
+      setAvailable(Impl);
+    break;
+  default:
+    break;
+  }
+}
+
 RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Module &M)
     : RuntimeLibcallsInfo(M.getTargetTriple()) {
   // TODO: Consider module flags
@@ -88,6 +123,8 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT,
   static constexpr Attribute::AttrKind CommonFnAttrs[] = {
       Attribute::NoCallback, Attribute::NoFree, Attribute::NoSync,
       Attribute::NoUnwind, Attribute::WillReturn};
+  static constexpr Attribute::AttrKind CommonPtrArgAttrs[] = {
+      Attribute::NoAlias, Attribute::WriteOnly, Attribute::NonNull};
 
   switch (LibcallImpl) {
   case RTLIB::impl___sincos_stret:
@@ -151,9 +188,67 @@ RuntimeLibcallsInfo::getFunctionTy(LLVMContext &Ctx, const Triple &TT,
                                                   fcNegNormal));
     return {FuncTy, Attrs};
   }
+  case RTLIB::impl__ZGVnN4vl4l4_sincospif:
+  case RTLIB::impl__ZGVnN2vl8l8_sincospi:
+  case RTLIB::impl__ZGVsNxvl4l4_sincospif:
+  case RTLIB::impl__ZGVsNxvl8l8_sincospi:
+  case RTLIB::impl_armpl_vsincospiq_f32:
+  case RTLIB::impl_armpl_vsincospiq_f64:
+  case RTLIB::impl_armpl_svsincospi_f32_x:
+  case RTLIB::impl_armpl_svsincospi_f64_x: {
+    AttrBuilder FuncAttrBuilder(Ctx);
+
+    bool IsF32 = LibcallImpl == RTLIB::impl__ZGVnN4vl4l4_sincospif ||
+                 LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+                 LibcallImpl == RTLIB::impl_armpl_vsincospiq_f32 ||
+                 LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x;
+    Type *ScalarTy = IsF32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+    unsigned EC = IsF32 ? 4 : 2;
+
+    bool IsScalable = LibcallImpl == RTLIB::impl__ZGVsNxvl4l4_sincospif ||
+                      LibcallImpl == RTLIB::impl__ZGVsNxvl8l8_sincospi ||
+                      LibcallImpl == RTLIB::impl_armpl_svsincospi_f32_x ||
+                      LibcallImpl == RTLIB::impl_armpl_svsincospi_f64_x;
+    VectorType *VecTy = VectorType::get(ScalarTy, EC, IsScalable);
+
+    for (Attribute::AttrKind Attr : CommonFnAttrs)
+      FuncAttrBuilder.addAttribute(Attr);
+    FuncAttrBuilder.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Mod));
+
+    AttributeList Attrs;
+    Attrs = Attrs.addFnAttributes(Ctx, FuncAttrBuilder);
+
+    {
+      AttrBuilder ArgAttrBuilder(Ctx);
+      for (Attribute::AttrKind AK : CommonPtrArgAttrs)
+        ArgAttrBuilder.addAttribute(AK);
+      ArgAttrBuilder.addAlignmentAttr(DL.getABITypeAlign(VecTy));
+      Attrs = Attrs.addParamAttributes(Ctx, 1, ArgAttrBuilder);
+      Attrs = Attrs.addParamAttributes(Ctx, 2, ArgAttrBuilder);
+    }
+
+    PointerType *PtrTy = PointerType::get(Ctx, 0);
+    SmallVector<Type *, 4> ArgTys = {VecTy, PtrTy, PtrTy};
+    if (hasVectorMaskArgument(LibcallImpl))
+      ArgTys.push_back(VectorType::get(Type::getInt1Ty(Ctx), EC, IsScalable));
+
+    return {FunctionType::get(Type::getVoidTy(Ctx), ArgTys, false), Attrs};
+  }
   default:
     return {};
   }
 
   return {};
 }
+
+bool RuntimeLibcallsInfo::hasVectorMaskArgument(RTLIB::LibcallImpl Impl) {
+  /// FIXME: This should be generated by tablegen and support the argument at an
+  /// arbitrary position
+  switch (Impl) {
+  case RTLIB::impl_armpl_svsincospi_f32_x:
+  case RTLIB::impl_armpl_svsincospi_f64_x:
+    return true;
+  default:
+    return false;
+  }
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 05a854a0bf3fa..5bce539c45341 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -635,6 +635,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FROUNDEVEN, VT, Action);
     setOperationAction(ISD::FTRUNC, VT, Action);
     setOperationAction(ISD::FLDEXP, VT, Action);
+    setOperationAction(ISD::FSINCOSPI, VT, Action);
   };
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll
new file mode 100644
index 0000000000000..d074d9ae24641
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.error.ll
@@ -0,0 +1,13 @@
+; RUN: not llc -mtriple=aarch64-gnu-linux -filetype=null %s 2>&1 | FileCheck %s
+
+; CHECK: error: no libcall available for fsincospi
+define { float, float } @test_sincospi_f32(float %a) {
+  %result = call { float, float } @llvm.sincospi.f32(float %a)
+  ret { float, float } %result
+}
+
+; CHECK: error: no libcall available for fsincospi
+define { double, double } @test_sincospi_f64(double %a) {
+  %result = call { double, double } @llvm.sincospi.f64(double %a)
+  ret { double, double } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
index d1d7d92adc05a..b386df077c09d 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
@@ -1,268 +1,250 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=arm64-apple-macosx10.9 < %s | FileCheck %s
 
-define { half, half } @test_sincospi_f16(half %a) {
+define { half, half } @test_sincospi_f16(half %a) #0 {
 ; CHECK-LABEL: test_sincospi_f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    fcvt s0, h0
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    add x1, sp, #8
-; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    bl ___sincospif
 ; CHECK-NEXT:    ldp s1, s0, [sp, #8]
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-NEXT:    fcvt h0, s0
 ; CHECK-NEXT:    fcvt h1, s1
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
   %result = call { half, half } @llvm.sincospi.f16(half %a)
   ret { half, half } %result
 }
 
-define half @test_sincospi_f16_only_use_sin(half %a) {
+define half @test_sincospi_f16_only_use_sin(half %a) #0 {
 ; CHECK-LABEL: test_sincospi_f16_only_use_sin:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    fcvt s0, h0
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    add x1, sp, #8
-; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    bl ___sincospif
 ; CHECK-NEXT:    ldr s0, [sp, #12]
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-NEXT:    fcvt h0, s0
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
   %result = call { half, half } @llvm.sincospi.f16(half %a)
   %result.0 = extractvalue { half, half } %result, 0
   ret half %result.0
 }
 
-define half @test_sincospi_f16_only_use_cos(half %a) {
+define half @test_sincospi_f16_only_use_cos(half %a) #0 {
 ; CHECK-LABEL: test_sincospi_f16_only_use_cos:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    fcvt s0, h0
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    add x1, sp, #8
-; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    bl ___sincospif
 ; CHECK-NEXT:    ldr s0, [sp, #8]
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-NEXT:    fcvt h0, s0
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
   %result = call { half, half } @llvm.sincospi.f16(half %a)
   %result.1 = extractvalue { half, half } %result, 1
   ret half %result.1
 }
 
-define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) {
+define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 {
 ; CHECK-LABEL: test_sincospi_v2f16:
-; CHECK:       // %bb.0:
+; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov h1, v0.h[1]
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    add x0, sp, #36
-; CHECK-NEXT:    add x1, sp, #32
-; CHECK-NEXT:    fcvt s0, h1
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h1, v0[1]
+; CHECK-NEXT:    str q0, [sp] ; 16-byte Folded Spill
 ; CHECK-NEXT:    add x0, sp, #28
 ; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr q0, [sp] ; 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #20
+; CHECK-NEXT:    add x1, sp, #16
 ; CHECK-NEXT:    fcvt s0, h0
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr q0, [sp] ; 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    mov h0, v0[2]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr q0, [sp] ; 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #44
 ; CHECK-NEXT:    add x1, sp, #40
-; CHECK-NEXT:    mov h0, v0.h[2]
-; CHECK-NEXT:    fcvt s0, h0
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    add x0, sp, #60
-; CHECK-NEXT:    add x1, sp, #56
-; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    mov h0, v0[3]
 ; CHECK-NEXT:    fcvt s0, h0
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldp s2, s0, [sp, #32]
-; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT:    ldp s3, s1, [sp, #24]
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldp s2, s0, [sp, #24]
+; CHECK-NEXT:    ldp s3, s1, [sp, #16]
+; CHECK-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
 ; CHECK-NEXT:    fcvt h4, s0
 ; CHECK-NEXT:    fcvt h2, s2
 ; CHECK-NEXT:    fcvt h0, s1
 ; CHECK-NEXT:    fcvt h1, s3
-; CHECK-NEXT:    ldp s5, s3, [sp, #40]
+; CHECK-NEXT:    ldp s5, s3, [sp, #32]
 ; CHECK-NEXT:    fcvt h3, s3
-; CHECK-NEXT:    mov v0.h[1], v4.h[0]
+; CHECK-NEXT:    mov.h v0[1], v4[0]
 ; CHECK-NEXT:    fcvt h4, s5
-; CHECK-NEXT:    mov v1.h[1], v2.h[0]
-; CHECK-NEXT:    ldp s5, s2, [sp, #56]
-; CHECK-NEXT:    mov v0.h[2], v3.h[0]
+; CHECK-NEXT:    mov.h v1[1], v2[0]
+; CHECK-NEXT:    ldp s5, s2, [sp, #40]
+; CHECK-NEXT:    mov.h v0[2], v3[0]
 ; CHECK-NEXT:    fcvt h2, s2
 ; CHECK-NEXT:    fcvt h3, s5
-; CHECK-NEXT:    mov v1.h[2], v4.h[0]
-; CHECK-NEXT:    mov v0.h[3], v2.h[0]
-; CHECK-NEXT:    mov v1.h[3], v3.h[0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    mov.h v1[2], v4[0]
+; CHECK-NEXT:    mov.h v0[3], v2[0]
+; CHECK-NEXT:    mov.h v1[3], v3[0]
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
   %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a)
   ret { <2 x half>, <2 x half> } %result
 }
 
-define { float, float } @test_sincospi_f32(float %a) {
+define { float, float } @test_sincospi_f32(float %a) #0 {
 ; CHECK-LABEL: test_sincospi_f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    add x1, sp, #8
-; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    bl ___sincospif
 ; CHECK-NEXT:    ldp s1, s0, [sp, #8]
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
   %result = call { float, float } @llvm.sincospi.f32(float %a)
   ret { float, float } %result
 }
 
-define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) {
+define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) #0 {
 ; CHECK-LABEL: test_sincospi_v3f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w21, -24
-; CHECK-NEXT:    .cfi_offset w22, -32
-; CHECK-NEXT:    .cfi_offset w30, -48
-; CHECK-NEXT:    add x0, sp, #20
-; CHECK-NEXT:    add x1, sp, #16
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #96
 ; CHECK-NEXT:    add x0, sp, #28
 ; CHECK-NEXT:    add x1, sp, #24
-; CHECK-NEXT:    add x19, sp, #28
-; CHECK-NEXT:    add x20, sp, #24
-; CHECK-NEXT:    mov s0, v0.s[1]
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp] ; 16-byte Folded Spill
+; CHECK-NEXT:    ; kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr q0, [sp] ; 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    add x19, sp, #36
+; CHECK-NEXT:    add x20, sp, #32
+; CHECK-NEXT:    mov s0, v0[1]
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr q0, [sp] ; 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #44
 ; CHECK-NEXT:    add x1, sp, #40
 ; CHECK-NEXT:    add x21, sp, #44
 ; CHECK-NEXT:    add x22, sp, #40
-; CHECK-NEXT:    mov s0, v0.s[2]
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldp s1, s0, [sp, #16]
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
-; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    ld1 { v0.s }[2], [x21]
-; CHECK-NEXT:    ld1 { v1.s }[2], [x22]
-; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    mov s0, v0[2]
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldp s1, s0, [sp, #24]
+; CHECK-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ld1.s { v0 }[1], [x19]
+; CHECK-NEXT:    ld1.s { v1 }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ld1.s { v0 }[2], [x21]
+; CHECK-NEXT:    ld1.s { v1 }[2], [x22]
+; CHECK-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
   %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a)
   ret { <3 x float>, <3 x float> } %result
 }
 
-define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) {
+define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) #0 {
 ; CHECK-LABEL: test_sincospi_v2f32:
-; CHECK:       // %bb.0:
+; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #64
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    add x0, sp, #44
-; CHECK-NEXT:    add x1, sp, #40
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    add x0, sp, #28
 ; CHECK-NEXT:    add x1, sp, #24
-; CHECK-NEXT:    add x19, sp, #28
-; CHECK-NEXT:    add x20, sp, #24
-; CHECK-NEXT:    mov s0, v0.s[1]
-; CHECK-NEXT:    bl sincospif
-; CHECK-NEXT:    ldp s1, s0, [sp, #40]
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
-; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
-; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp] ; 16-byte Folded Spill
+; CHECK-NEXT:    ; kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr q0, [sp] ; 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #20
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    add x19, sp, #20
+; CHECK-NEXT:    add x20, sp, #16
+; CHECK-NEXT:    mov s0, v0[1]
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldp s1, s0, [sp, #24]
+; CHECK-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ld1.s { v0 }[1], [x19]
+; CHECK-NEXT:    ld1.s { v1 }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
   %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a)
   ret { <2 x float>, <2 x float> } %result
 }
 
-define { double, double } @test_sincospi_f64(double %a) {
+define { double, double } @test_sincospi_f64(double %a) #0 {
 ; CHECK-LABEL: test_sincospi_f64:
-; CHECK:       // %bb.0:
+; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
-; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    add x0, sp, #24
-; CHECK-NEXT:    add x1, sp, #8
-; CHECK-NEXT:    bl sincospi
-; CHECK-NEXT:    ldr d0, [sp, #24]
-; CHECK-NEXT:    ldr d1, [sp, #8]
-; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #8
+; CHECK-NEXT:    mov x1, sp
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    bl ___sincospi
+; CHECK-NEXT:    ldp d1, d0, [sp]
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
   %result = call { double, double } @llvm.sincospi.f64(double %a)
   ret { double, double } %result
 }
 
-define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) {
+define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) #0 {
 ; CHECK-LABEL: test_sincospi_v2f64:
-; CHECK:       // %bb.0:
+; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    add x0, sp, #56
-; CHECK-NEXT:    add x1, sp, #40
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    bl sincospi
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    add x0, sp, #32
-; CHECK-NEXT:    add x1, sp, #24
-; CHECK-NEXT:    add x19, sp, #32
-; CHECK-NEXT:    add x20, sp, #24
-; CHECK-NEXT:    mov d0, v0.d[1]
-; CHECK-NEXT:    bl sincospi
-; CHECK-NEXT:    ldr d0, [sp, #56]
-; CHECK-NEXT:    ldr d1, [sp, #40]
-; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT:    ld1 { v0.d }[1], [x19]
-; CHECK-NEXT:    ld1 { v1.d }[1], [x20]
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #40
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp] ; 16-byte Folded Spill
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bl ___sincospi
+; CHECK-NEXT:    ldr q0, [sp] ; 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    add x19, sp, #24
+; CHECK-NEXT:    add x20, sp, #16
+; CHECK-NEXT:    mov d0, v0[1]
+; CHECK-NEXT:    bl ___sincospi
+; CHECK-NEXT:    ldp d1, d0, [sp, #32]
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ld1.d { v0 }[1], [x19]
+; CHECK-NEXT:    ld1.d { v1 }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #80
 ; CHECK-NEXT:    ret
   %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a)
   ret { <2 x double>, <2 x double> } %result
 }
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/ARM/llvm.sincospi.ll b/llvm/test/CodeGen/ARM/llvm.sincospi.ll
new file mode 100644
index 0000000000000..91bf0aaf1806a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/llvm.sincospi.ll
@@ -0,0 +1,249 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=thumbv7-apple-ios7.0.0 < %s | FileCheck %s
+
+define { half, half } @test_sincospi_f16(half %a) #0 {
+; CHECK-LABEL: test_sincospi_f16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    bl ___extendhfsf2
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr r0, [sp, #4]
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    pop {r4, pc}
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  ret { half, half } %result
+}
+
+define half @test_sincospi_f16_only_use_sin(half %a) #0 {
+; CHECK-LABEL: test_sincospi_f16_only_use_sin:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    str lr, [sp, #-4]!
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    bl ___extendhfsf2
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr r0, [sp, #4]
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    ldr lr, [sp], #4
+; CHECK-NEXT:    bx lr
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  %result.0 = extractvalue { half, half } %result, 0
+  ret half %result.0
+}
+
+define half @test_sincospi_f16_only_use_cos(half %a) #0 {
+; CHECK-LABEL: test_sincospi_f16_only_use_cos:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    str lr, [sp, #-4]!
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    bl ___extendhfsf2
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    ldr lr, [sp], #4
+; CHECK-NEXT:    bx lr
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  %result.1 = extractvalue { half, half } %result, 1
+  ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 {
+; CHECK-LABEL: test_sincospi_v2f16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vpush {d8}
+; CHECK-NEXT:    sub sp, #24
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bl ___extendhfsf2
+; CHECK-NEXT:    add r1, sp, #12
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    bl ___extendhfsf2
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr r0, [sp, #12]
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    ldr r1, [sp, #4]
+; CHECK-NEXT:    strh.w r0, [sp, #22]
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    strh.w r0, [sp, #20]
+; CHECK-NEXT:    add r0, sp, #20
+; CHECK-NEXT:    vld1.32 {d8[0]}, [r0:32]
+; CHECK-NEXT:    ldr r0, [sp, #8]
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    ldr r1, [sp]
+; CHECK-NEXT:    strh.w r0, [sp, #18]
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bl ___truncsfhf2
+; CHECK-NEXT:    strh.w r0, [sp, #16]
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vmovl.u16 q9, d8
+; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT:    vmovl.u16 q8, d16
+; CHECK-NEXT:    vmov.32 r0, d18[0]
+; CHECK-NEXT:    vmov.32 r1, d18[1]
+; CHECK-NEXT:    vmov.32 r2, d16[0]
+; CHECK-NEXT:    vmov.32 r3, d16[1]
+; CHECK-NEXT:    add sp, #24
+; CHECK-NEXT:    vpop {d8}
+; CHECK-NEXT:    pop {r4, pc}
+  %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a)
+  ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincospi_f32(float %a) #0 {
+; CHECK-LABEL: test_sincospi_f32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    str lr, [sp, #-4]!
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldrd r1, r0, [sp], #8
+; CHECK-NEXT:    ldr lr, [sp], #4
+; CHECK-NEXT:    bx lr
+  %result = call { float, float } @llvm.sincospi.f32(float %a)
+  ret { float, float } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) #0 {
+; CHECK-LABEL: test_sincospi_v2f32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    str lr, [sp, #-4]!
+; CHECK-NEXT:    vpush {d8}
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    vmov r0, s16
+; CHECK-NEXT:    add r1, sp, #12
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    vldr s1, [sp, #4]
+; CHECK-NEXT:    vldr s3, [sp]
+; CHECK-NEXT:    vldr s0, [sp, #12]
+; CHECK-NEXT:    vldr s2, [sp, #8]
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    vpop {d8}
+; CHECK-NEXT:    ldr lr, [sp], #4
+; CHECK-NEXT:    bx lr
+  %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a)
+  ret { <2 x float>, <2 x float> } %result
+}
+
+define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) #0 {
+; CHECK-LABEL: test_sincospi_v3f32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    add r1, sp, #12
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    mov r0, r7
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    ldr r0, [sp, #36]
+; CHECK-NEXT:    vmov d0, r7, r6
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    add.w r2, r4, #16
+; CHECK-NEXT:    vmov d1, r5, r0
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vldr s1, [sp, #8]
+; CHECK-NEXT:    vldr s3, [sp, #12]
+; CHECK-NEXT:    vldr s2, [sp, #4]
+; CHECK-NEXT:    vldr s0, [sp]
+; CHECK-NEXT:    vst1.32 {d1}, [r1:64]!
+; CHECK-NEXT:    vst1.32 {d0}, [r2:64]!
+; CHECK-NEXT:    bl ___sincospif
+; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+  %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a)
+  ret { <3 x float>, <3 x float> } %result
+}
+
+define { double, double } @test_sincospi_f64(double %a) #0 {
+; CHECK-LABEL: test_sincospi_f64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, r7, lr}
+; CHECK-NEXT:    add r7, sp, #4
+; CHECK-NEXT:    sub sp, #20
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    bfc r4, #0, #3
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    mov r3, sp
+; CHECK-NEXT:    bl ___sincospi
+; CHECK-NEXT:    subs r4, r7, #4
+; CHECK-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-NEXT:    ldrd r2, r3, [sp]
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r7, pc}
+  %result = call { double, double } @llvm.sincospi.f64(double %a)
+  ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) #0 {
+; CHECK-LABEL: test_sincospi_v2f64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    add r7, sp, #16
+; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    bfc r4, #0, #3
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    ldr r1, [r7, #8]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    mov r8, r2
+; CHECK-NEXT:    add r2, sp, #24
+; CHECK-NEXT:    add r3, sp, #16
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    bl ___sincospi
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    mov r3, sp
+; CHECK-NEXT:    mov r0, r6
+; CHECK-NEXT:    mov r1, r8
+; CHECK-NEXT:    bl ___sincospi
+; CHECK-NEXT:    vldr d19, [sp, #24]
+; CHECK-NEXT:    vldr d18, [sp, #8]
+; CHECK-NEXT:    vldr d17, [sp, #16]
+; CHECK-NEXT:    vldr d16, [sp]
+; CHECK-NEXT:    vst1.32 {d18, d19}, [r4]!
+; CHECK-NEXT:    vst1.32 {d16, d17}, [r4]
+; CHECK-NEXT:    sub.w r4, r7, #16
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
+  %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a)
+  ret { <2 x double>, <2 x double> } %result
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincos.ll b/llvm/test/CodeGen/PowerPC/llvm.sincos.ll
index aaf81ff814488..5b4e91c449522 100644
--- a/llvm/test/CodeGen/PowerPC/llvm.sincos.ll
+++ b/llvm/test/CodeGen/PowerPC/llvm.sincos.ll
@@ -26,30 +26,6 @@ define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128(ppc_fp128 %a) {
   ret { ppc_fp128, ppc_fp128 } %result
 }
 
-define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128(ppc_fp128 %a) {
-; CHECK-LABEL: test_sincospi_ppcf128:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    addi r5, r1, 48
-; CHECK-NEXT:    addi r6, r1, 32
-; CHECK-NEXT:    bl sincospil
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfd f1, 48(r1)
-; CHECK-NEXT:    lfd f2, 56(r1)
-; CHECK-NEXT:    lfd f3, 32(r1)
-; CHECK-NEXT:    lfd f4, 40(r1)
-; CHECK-NEXT:    addi r1, r1, 64
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    mtlr r0
-; CHECK-NEXT:    blr
-  %result = call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
-  ret { ppc_fp128, ppc_fp128 } %result
-}
-
 ; FIXME: This could be made a tail call with the default expansion of llvm.sincos.
 define void @test_sincos_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) {
 ; CHECK-LABEL: test_sincos_ppcf128_void_tail_call:
@@ -73,29 +49,6 @@ define void @test_sincos_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_s
   ret void
 }
 
-; FIXME: This could be made a tail call with the default expansion of llvm.sincospi.
-define void @test_sincospi_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) {
-; CHECK-LABEL: test_sincospi_ppcf128_void_tail_call:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    bl sincospil
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    addi r1, r1, 32
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    mtlr r0
-; CHECK-NEXT:    blr
-  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
-  %result.0 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0
-  %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1
-  store ppc_fp128 %result.0, ptr %out_sin, align 16
-  store ppc_fp128 %result.1, ptr %out_cos, align 16
-  ret void
-}
-
 ; NOTE: This would need a struct-return library call for llvm.sincos to become a tail call.
 define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128_tail_call(ppc_fp128 %a) {
 ; CHECK-LABEL: test_sincos_ppcf128_tail_call:
@@ -120,28 +73,3 @@ define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128_tail_call(ppc_fp128 %a) {
   %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %a)
   ret { ppc_fp128, ppc_fp128 } %result
 }
-
-; NOTE: This would need a struct-return library call for llvm.sincospi to become a tail call.
-define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128_tail_call(ppc_fp128 %a) {
-; CHECK-LABEL: test_sincospi_ppcf128_tail_call:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    addi r5, r1, 48
-; CHECK-NEXT:    addi r6, r1, 32
-; CHECK-NEXT:    bl sincospil
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    lfd f1, 48(r1)
-; CHECK-NEXT:    lfd f2, 56(r1)
-; CHECK-NEXT:    lfd f3, 32(r1)
-; CHECK-NEXT:    lfd f4, 40(r1)
-; CHECK-NEXT:    addi r1, r1, 64
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    mtlr r0
-; CHECK-NEXT:    blr
-  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
-  ret { ppc_fp128, ppc_fp128 } %result
-}
diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ll b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ll
new file mode 100644
index 0000000000000..75e7559386f16
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ll
@@ -0,0 +1,21 @@
+; RUN: not llc -mtriple=powerpc64le-gnu-linux -filetype=null %s 2>&1 | FileCheck %s
+
+; CHECK: error: no libcall available for fsincospi
+define { half, half } @test_sincospi_f16(half %a) #0 {
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  ret { half, half } %result
+}
+
+; CHECK: error: no libcall available for fsincospi
+define { float, float } @test_sincospi_f32(float %a) #0 {
+  %result = call { float, float } @llvm.sincospi.f32(float %a)
+  ret { float, float } %result
+}
+
+; CHECK: error: no libcall available for fsincospi
+define { double, double } @test_sincospi_f64(double %a) #0 {
+  %result = call { double, double } @llvm.sincospi.f64(double %a)
+  ret { double, double } %result
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll
new file mode 100644
index 0000000000000..bc656bb785e9e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/llvm.sincospi.ppcfp128.ll
@@ -0,0 +1,25 @@
+; XFAIL: *
+; FIXME: asserts
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-gnu-linux -filetype=null \
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names %s
+
+define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128(ppc_fp128 %a) {
+  %result = call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
+  ret { ppc_fp128, ppc_fp128 } %result
+}
+
+; FIXME: This could be made a tail call with the default expansion of llvm.sincospi.
+define void @test_sincospi_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) {
+  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
+  %result.0 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0
+  %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1
+  store ppc_fp128 %result.0, ptr %out_sin, align 16
+  store ppc_fp128 %result.1, ptr %out_cos, align 16
+  ret void
+}
+
+; NOTE: This would need a struct-return library call for llvm.sincospi to become a tail call.
+define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128_tail_call(ppc_fp128 %a) {
+  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
+  ret { ppc_fp128, ppc_fp128 } %result
+}
diff --git a/llvm/test/CodeGen/X86/llvm.sincospi.ll b/llvm/test/CodeGen/X86/llvm.sincospi.ll
new file mode 100644
index 0000000000000..5546c66deba30
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llvm.sincospi.ll
@@ -0,0 +1,233 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-apple-macosx10.9 < %s | FileCheck %s
+
+define { half, half } @test_sincospi_f16(half %a) #0 {
+; CHECK-LABEL: test_sincospi_f16:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    pextrw $0, %xmm0, %eax
+; CHECK-NEXT:    movzwl %ax, %edi
+; CHECK-NEXT:    callq ___extendhfsf2
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm1
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  ret { half, half } %result
+}
+
+define half @test_sincospi_f16_only_use_sin(half %a) #0 {
+; CHECK-LABEL: test_sincospi_f16_only_use_sin:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    pextrw $0, %xmm0, %eax
+; CHECK-NEXT:    movzwl %ax, %edi
+; CHECK-NEXT:    callq ___extendhfsf2
+; CHECK-NEXT:    movq %rsp, %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  %result.0 = extractvalue { half, half } %result, 0
+  ret half %result.0
+}
+
+define half @test_sincospi_f16_only_use_cos(half %a) #0 {
+; CHECK-LABEL: test_sincospi_f16_only_use_cos:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    pextrw $0, %xmm0, %eax
+; CHECK-NEXT:    movzwl %ax, %edi
+; CHECK-NEXT:    callq ___extendhfsf2
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    movq %rsp, %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  %result.1 = extractvalue { half, half } %result, 1
+  ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) #0 {
+; CHECK-LABEL: test_sincospi_v2f16:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    subq $64, %rsp
+; CHECK-NEXT:    pextrw $0, %xmm0, %ebx
+; CHECK-NEXT:    psrld $16, %xmm0
+; CHECK-NEXT:    pextrw $0, %xmm0, %eax
+; CHECK-NEXT:    movzwl %ax, %edi
+; CHECK-NEXT:    callq ___extendhfsf2
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    movq %rsp, %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movzwl %bx, %edi
+; CHECK-NEXT:    callq ___extendhfsf2
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq ___truncsfhf2
+; CHECK-NEXT:    ## kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
+; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
+; CHECK-NEXT:    ## xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
+; CHECK-NEXT:    ## xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; CHECK-NEXT:    addq $64, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a)
+  ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincospi_f32(float %a) #0 {
+; CHECK-LABEL: test_sincospi_f32:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    movq %rsp, %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %result = call { float, float } @llvm.sincospi.f32(float %a)
+  ret { float, float } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) #0 {
+; CHECK-LABEL: test_sincospi_v2f32:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    movq %rsp, %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a)
+  ret { <2 x float>, <2 x float> } %result
+}
+
+define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) #0 {
+; CHECK-LABEL: test_sincospi_v3f32:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospif
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT:    addq $56, %rsp
+; CHECK-NEXT:    retq
+  %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a)
+  ret { <3 x float>, <3 x float> } %result
+}
+
+define { double, double } @test_sincospi_f64(double %a) #0 {
+; CHECK-LABEL: test_sincospi_f64:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospi
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    retq
+  %result = call { double, double } @llvm.sincospi.f64(double %a)
+  ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) #0 {
+; CHECK-LABEL: test_sincospi_v2f64:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT:    callq ___sincospi
+; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT:    movq %rsp, %rsi
+; CHECK-NEXT:    callq ___sincospi
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
+; CHECK-NEXT:    addq $56, %rsp
+; CHECK-NEXT:    retq
+  %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a)
+  ret { <2 x double>, <2 x double> } %result
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
new file mode 100644
index 0000000000000..c45f319f80122
--- /dev/null
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll
@@ -0,0 +1,12 @@
+; REQUIRES: aarch64-registered-target
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s
+
+; CHECK: declare void @armpl_svsincospi_f32_x(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 4 x i1>) [[ATTRS:#[0-9]+]]
+
+; CHECK: declare void @armpl_svsincospi_f64_x(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 2 x i1>) [[ATTRS:#[0-9]+]]
+
+; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
+; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
+; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
new file mode 100644
index 0000000000000..7972e0ca1c487
--- /dev/null
+++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll
@@ -0,0 +1,12 @@
+; REQUIRES: aarch64-registered-target
+; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s
+
+; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]]
+
+; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]]
+
+; CHECK: declare void @_ZGVsNxvl4l4_sincospif(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]]
+
+; CHECK: declare void @_ZGVsNxvl8l8_sincospi(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]]
+
+; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }