diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 5f0ae5ce8614c..8f78ac8ac4133 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16122,6 +16122,52 @@ of the argument.
 When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
+'``llvm.sincospi.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sincospi`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare { float, float }          @llvm.sincospi.f32(float  %Val)
+      declare { double, double }        @llvm.sincospi.f64(double %Val)
+      declare { x86_fp80, x86_fp80 }    @llvm.sincospi.f80(x86_fp80  %Val)
+      declare { fp128, fp128 }          @llvm.sincospi.f128(fp128 %Val)
+      declare { ppc_fp128, ppc_fp128 }  @llvm.sincospi.ppcf128(ppc_fp128  %Val)
+      declare { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float>  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sincospi.*``' intrinsics returns the sine and cosine of pi*operand.
+
+Arguments:
+""""""""""
+
+The argument is a :ref:`floating-point <t_floating>` value or
+:ref:`vector <t_vector>` of floating-point values. Returns two values matching
+the argument type in a struct.
+
+Semantics:
+""""""""""
+
+This is equivalent to the ``llvm.sincos.*`` intrinsic where the argument has been
+multiplied by pi, however, it computes the result more accurately especially
+for large input values.
+
+.. note::
+
+  Currently, the default lowering of this intrinsic relies on the ``sincospi[f|l]``
+  functions being available in the target's runtime (e.g. libc).
+
+When specified with the fast-math-flag 'afn', the result may be approximated
+using a less accurate calculation.
+
 '``llvm.modf.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 8468992ed4b7a..339b83637fa8f 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2101,6 +2101,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::sincos:
       ISD = ISD::FSINCOS;
       break;
+    case Intrinsic::sincospi:
+      ISD = ISD::FSINCOSPI;
+      break;
     case Intrinsic::modf:
       ISD = ISD::FMODF;
       break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 046d9befd0e91..68ed812222dfd 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1058,6 +1058,10 @@ enum NodeType {
   /// FSINCOS - Compute both fsin and fcos as a single operation.
   FSINCOS,
 
+  /// FSINCOSPI - Compute both the sine and cosine times pi more accurately
+  /// than FSINCOS(pi*x), especially for large x.
+  FSINCOSPI,
+
   /// FMODF - Decomposes the operand into integral and fractional parts, each
   /// having the same type and sign as the operand.
   FMODF,
diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
index 59313520e0d83..34d783ae3f513 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
@@ -66,6 +66,10 @@ Libcall getFREXP(EVT RetVT);
 /// UNKNOWN_LIBCALL if there is none.
 Libcall getFSINCOS(EVT RetVT);
 
+/// getSINCOSPI - Return the SINCOSPI_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+Libcall getSINCOSPI(EVT RetVT);
+
 /// getMODF - Return the MODF_* value for the given types, or
 /// UNKNOWN_LIBCALL if there is none.
 Libcall getMODF(EVT RetVT);
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 9671c81ab3d32..14ecae41ff08f 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1075,6 +1075,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
   def int_roundeven    : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
                              [llvm_anyfloat_ty]>;
+  def int_sincospi : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+                             [llvm_anyfloat_ty]>;
   def int_modf : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
                              [llvm_anyfloat_ty]>;
 
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index dc69b1ae19769..a7963543c4350 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -354,6 +354,11 @@ HANDLE_LIBCALL(FREXP_F64, "frexp")
 HANDLE_LIBCALL(FREXP_F80, "frexpl")
 HANDLE_LIBCALL(FREXP_F128, "frexpl")
 HANDLE_LIBCALL(FREXP_PPCF128, "frexpl")
+HANDLE_LIBCALL(SINCOSPI_F32, "sincospif")
+HANDLE_LIBCALL(SINCOSPI_F64, "sincospi")
+HANDLE_LIBCALL(SINCOSPI_F80, "sincospil")
+HANDLE_LIBCALL(SINCOSPI_F128, "sincospil")
+HANDLE_LIBCALL(SINCOSPI_PPCF128, "sincospil")
 HANDLE_LIBCALL(MODF_F32, "modff")
 HANDLE_LIBCALL(MODF_F64, "modf")
 HANDLE_LIBCALL(MODF_F80, "modfl")
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f61928a66eb3c..66d7f57b93fb7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4520,11 +4520,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     ExpandFPLibCall(Node, RTLIB::TANH_F32, RTLIB::TANH_F64, RTLIB::TANH_F80,
                     RTLIB::TANH_F128, RTLIB::TANH_PPCF128, Results);
     break;
-  case ISD::FSINCOS: {
-    RTLIB::Libcall LC = RTLIB::getFSINCOS(Node->getValueType(0));
+  case ISD::FSINCOS:
+  case ISD::FSINCOSPI: {
+    EVT VT = Node->getValueType(0);
+    RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
+                            ? RTLIB::getFSINCOS(VT)
+                            : RTLIB::getSINCOSPI(VT);
     bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
     if (!Expanded)
-      llvm_unreachable("Expected scalar FSINCOS to expand to libcall!");
+      llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!");
     break;
   }
   case ISD::FLOG:
@@ -5507,7 +5511,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     break;
   }
   case ISD::FMODF:
-  case ISD::FSINCOS: {
+  case ISD::FSINCOS:
+  case ISD::FSINCOSPI: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(Node->getOpcode(), dl, DAG.getVTList(NVT, NVT), Tmp1,
                        Node->getFlags());
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2a4eed1ed527a..6dcc2464f61f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2768,6 +2768,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
 
     case ISD::FMODF:
     case ISD::FSINCOS:
+    case ISD::FSINCOSPI:
       R = PromoteFloatRes_UnaryWithTwoFPResults(N);
       break;
     case ISD::FP_ROUND:   R = PromoteFloatRes_FP_ROUND(N); break;
@@ -3230,6 +3231,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
 
   case ISD::FMODF:
   case ISD::FSINCOS:
+  case ISD::FSINCOSPI:
     R = SoftPromoteHalfRes_UnaryWithTwoFPResults(N);
     break;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 416da1bb7bfcf..111b08aeab185 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -456,6 +456,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FFREXP:
   case ISD::FMODF:
   case ISD::FSINCOS:
+  case ISD::FSINCOSPI:
   case ISD::SADDSAT:
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
@@ -1217,9 +1218,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
       return;
 
     break;
-  case ISD::FSINCOS: {
-    RTLIB::Libcall LC =
-        RTLIB::getFSINCOS(Node->getValueType(0).getVectorElementType());
+  case ISD::FSINCOS:
+  case ISD::FSINCOSPI: {
+    EVT VT = Node->getValueType(0).getVectorElementType();
+    RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
+                            ? RTLIB::getFSINCOS(VT)
+                            : RTLIB::getSINCOSPI(VT);
     if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
       return;
     break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index da793a34879b8..915ee2d110332 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -136,6 +136,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMODF:
   case ISD::FFREXP:
   case ISD::FSINCOS:
+  case ISD::FSINCOSPI:
     R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo);
     break;
   case ISD::ADD:
@@ -1265,6 +1266,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMODF:
   case ISD::FFREXP:
   case ISD::FSINCOS:
+  case ISD::FSINCOSPI:
     SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi);
     break;
 
@@ -4815,7 +4817,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     break;
   case ISD::FMODF:
   case ISD::FFREXP:
-  case ISD::FSINCOS: {
+  case ISD::FSINCOS:
+  case ISD::FSINCOSPI: {
     if (!unrollExpandedOp())
       Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo);
     break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4a9ac8580e4e2..7a1187ef303dc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6979,6 +6979,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     return;
   case Intrinsic::modf:
   case Intrinsic::sincos:
+  case Intrinsic::sincospi:
   case Intrinsic::frexp: {
     unsigned Opcode;
     switch (Intrinsic) {
@@ -6987,6 +6988,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     case Intrinsic::sincos:
       Opcode = ISD::FSINCOS;
       break;
+    case Intrinsic::sincospi:
+      Opcode = ISD::FSINCOSPI;
+      break;
     case Intrinsic::modf:
       Opcode = ISD::FMODF;
       break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7b1a2d640a2bd..5d3e404350cda 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -219,6 +219,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FCOS:                       return "fcos";
   case ISD::STRICT_FCOS:                return "strict_fcos";
   case ISD::FSINCOS:                    return "fsincos";
+  case ISD::FSINCOSPI:                  return "fsincospi";
   case ISD::FMODF:                      return "fmodf";
   case ISD::FTAN:                       return "ftan";
   case ISD::STRICT_FTAN:                return "strict_ftan";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 1f39ec205c517..d9a19dfceb6d3 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -407,6 +407,11 @@ RTLIB::Libcall RTLIB::getFSINCOS(EVT RetVT) {
                       SINCOS_PPCF128);
 }
 
+RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) {
+  return getFPLibCall(RetVT, SINCOSPI_F32, SINCOSPI_F64, SINCOSPI_F80,
+                      SINCOSPI_F128, SINCOSPI_PPCF128);
+}
+
 RTLIB::Libcall RTLIB::getMODF(EVT RetVT) {
   return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128,
                       MODF_PPCF128);
@@ -781,7 +786,7 @@ void TargetLoweringBase::initActions() {
 
     // These library functions default to expand.
     setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP,
-                        ISD::FSINCOS, ISD::FMODF},
+                        ISD::FSINCOS, ISD::FSINCOSPI, ISD::FMODF},
                        VT, Expand);
 
     // These operations default to expand for vector types.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7f6da4fa38f90..503aee55c270f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -735,19 +735,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
   }
 
-  for (auto Op : {ISD::FREM,         ISD::FPOW,          ISD::FPOWI,
-                  ISD::FCOS,         ISD::FSIN,          ISD::FSINCOS,
-                  ISD::FMODF,        ISD::FACOS,         ISD::FASIN,
-                  ISD::FATAN,        ISD::FATAN2,        ISD::FCOSH,
-                  ISD::FSINH,        ISD::FTANH,         ISD::FTAN,
-                  ISD::FEXP,         ISD::FEXP2,         ISD::FEXP10,
-                  ISD::FLOG,         ISD::FLOG2,         ISD::FLOG10,
-                  ISD::STRICT_FREM,  ISD::STRICT_FPOW,   ISD::STRICT_FPOWI,
-                  ISD::STRICT_FCOS,  ISD::STRICT_FSIN,   ISD::STRICT_FACOS,
-                  ISD::STRICT_FASIN, ISD::STRICT_FATAN,  ISD::STRICT_FATAN2,
-                  ISD::STRICT_FCOSH, ISD::STRICT_FSINH,  ISD::STRICT_FTANH,
-                  ISD::STRICT_FEXP,  ISD::STRICT_FEXP2,  ISD::STRICT_FLOG,
-                  ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
+  for (auto Op : {ISD::FREM,          ISD::FPOW,         ISD::FPOWI,
+                  ISD::FCOS,          ISD::FSIN,         ISD::FSINCOS,
+                  ISD::FSINCOSPI,     ISD::FMODF,        ISD::FACOS,
+                  ISD::FASIN,         ISD::FATAN,        ISD::FATAN2,
+                  ISD::FCOSH,         ISD::FSINH,        ISD::FTANH,
+                  ISD::FTAN,          ISD::FEXP,         ISD::FEXP2,
+                  ISD::FEXP10,        ISD::FLOG,         ISD::FLOG2,
+                  ISD::FLOG10,        ISD::STRICT_FREM,  ISD::STRICT_FPOW,
+                  ISD::STRICT_FPOWI,  ISD::STRICT_FCOS,  ISD::STRICT_FSIN,
+                  ISD::STRICT_FACOS,  ISD::STRICT_FASIN, ISD::STRICT_FATAN,
+                  ISD::STRICT_FATAN2, ISD::STRICT_FCOSH, ISD::STRICT_FSINH,
+                  ISD::STRICT_FTANH,  ISD::STRICT_FEXP,  ISD::STRICT_FEXP2,
+                  ISD::STRICT_FLOG,   ISD::STRICT_FLOG2, ISD::STRICT_FLOG10,
+                  ISD::STRICT_FTAN}) {
     setOperationAction(Op, MVT::f16, Promote);
     setOperationAction(Op, MVT::v4f16, Expand);
     setOperationAction(Op, MVT::v8f16, Expand);
@@ -1208,7 +1209,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
           ISD::FSIN,              ISD::FCOS,           ISD::FTAN,
           ISD::FASIN,             ISD::FACOS,          ISD::FATAN,
           ISD::FSINH,             ISD::FCOSH,          ISD::FTANH,
-          ISD::FPOW,              ISD::FLOG,           ISD::FLOG2,          
+          ISD::FPOW,              ISD::FLOG,           ISD::FLOG2,
           ISD::FLOG10,            ISD::FEXP,           ISD::FEXP2,
           ISD::FEXP10,            ISD::FRINT,          ISD::FROUND,
           ISD::FROUNDEVEN,        ISD::FTRUNC,         ISD::FMINNUM,
@@ -1217,7 +1218,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
           ISD::STRICT_FADD,       ISD::STRICT_FSUB,    ISD::STRICT_FMUL,
           ISD::STRICT_FDIV,       ISD::STRICT_FMA,     ISD::STRICT_FCEIL,
           ISD::STRICT_FFLOOR,     ISD::STRICT_FSQRT,   ISD::STRICT_FRINT,
-          ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND,  ISD::STRICT_FTRUNC,  
+          ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND,  ISD::STRICT_FTRUNC,
           ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
           ISD::STRICT_FMINIMUM,   ISD::STRICT_FMAXIMUM})
       setOperationAction(Op, MVT::v1f64, Expand);
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
new file mode 100644
index 0000000000000..d1d7d92adc05a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
@@ -0,0 +1,268 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { half, half } @test_sincospi_f16(half %a) {
+; CHECK-LABEL: test_sincospi_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldp s1, s0, [sp, #8]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    fcvt h1, s1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  ret { half, half } %result
+}
+
+define half @test_sincospi_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincospi_f16_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr s0, [sp, #12]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  %result.0 = extractvalue { half, half } %result, 0
+  ret half %result.0
+}
+
+define half @test_sincospi_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincospi_f16_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr s0, [sp, #8]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %result = call { half, half } @llvm.sincospi.f16(half %a)
+  %result.1 = extractvalue { half, half } %result, 1
+  ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincospi_v2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x1, sp, #56
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldp s2, s0, [sp, #32]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp s3, s1, [sp, #24]
+; CHECK-NEXT:    fcvt h4, s0
+; CHECK-NEXT:    fcvt h2, s2
+; CHECK-NEXT:    fcvt h0, s1
+; CHECK-NEXT:    fcvt h1, s3
+; CHECK-NEXT:    ldp s5, s3, [sp, #40]
+; CHECK-NEXT:    fcvt h3, s3
+; CHECK-NEXT:    mov v0.h[1], v4.h[0]
+; CHECK-NEXT:    fcvt h4, s5
+; CHECK-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-NEXT:    ldp s5, s2, [sp, #56]
+; CHECK-NEXT:    mov v0.h[2], v3.h[0]
+; CHECK-NEXT:    fcvt h2, s2
+; CHECK-NEXT:    fcvt h3, s5
+; CHECK-NEXT:    mov v1.h[2], v4.h[0]
+; CHECK-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-NEXT:    mov v1.h[3], v3.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+  %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a)
+  ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincospi_f32(float %a) {
+; CHECK-LABEL: test_sincospi_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldp s1, s0, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %result = call { float, float } @llvm.sincospi.f32(float %a)
+  ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_sincospi_v3f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w30, -48
+; CHECK-NEXT:    add x0, sp, #20
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #28
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    add x21, sp, #44
+; CHECK-NEXT:    add x22, sp, #40
+; CHECK-NEXT:    mov s0, v0.s[2]
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldp s1, s0, [sp, #16]
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[2], [x21]
+; CHECK-NEXT:    ld1 { v1.s }[2], [x22]
+; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+  %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a)
+  ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincospi_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #28
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    bl sincospif
+; CHECK-NEXT:    ldp s1, s0, [sp, #40]
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+  %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a)
+  ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_sincospi_f64(double %a) {
+; CHECK-LABEL: test_sincospi_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincospi
+; CHECK-NEXT:    ldr d0, [sp, #24]
+; CHECK-NEXT:    ldr d1, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+  %result = call { double, double } @llvm.sincospi.f64(double %a)
+  ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincospi_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    add x0, sp, #56
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bl sincospi
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #32
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #32
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov d0, v0.d[1]
+; CHECK-NEXT:    bl sincospi
+; CHECK-NEXT:    ldr d0, [sp, #56]
+; CHECK-NEXT:    ldr d1, [sp, #40]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.d }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.d }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+  %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a)
+  ret { <2 x double>, <2 x double> } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/veclib-llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/veclib-llvm.sincospi.ll
new file mode 100644
index 0000000000000..fad865d20f7df
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/veclib-llvm.sincospi.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -check-prefix=SLEEF
+; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -check-prefix=ARMPL
+
+define void @test_sincospi_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_v4f32:
+; SLEEF:       // %bb.0:
+; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT:    .cfi_def_cfa_offset 16
+; SLEEF-NEXT:    .cfi_offset w30, -16
+; SLEEF-NEXT:    bl _ZGVnN4vl4l4_sincospif
+; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT:    ret
+;
+; ARMPL-LABEL: test_sincospi_v4f32:
+; ARMPL:       // %bb.0:
+; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT:    .cfi_def_cfa_offset 16
+; ARMPL-NEXT:    .cfi_offset w30, -16
+; ARMPL-NEXT:    bl armpl_vsincospiq_f32
+; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT:    ret
+  %result = call { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float> %x)
+  %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
+  %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
+  store <4 x float> %result.0, ptr %out_sin, align 4
+  store <4 x float> %result.1, ptr %out_cos, align 4
+  ret void
+}
+
+define void @test_sincospi_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_v2f64:
+; SLEEF:       // %bb.0:
+; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT:    .cfi_def_cfa_offset 16
+; SLEEF-NEXT:    .cfi_offset w30, -16
+; SLEEF-NEXT:    bl _ZGVnN2vl8l8_sincospi
+; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT:    ret
+;
+; ARMPL-LABEL: test_sincospi_v2f64:
+; ARMPL:       // %bb.0:
+; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT:    .cfi_def_cfa_offset 16
+; ARMPL-NEXT:    .cfi_offset w30, -16
+; ARMPL-NEXT:    bl armpl_vsincospiq_f64
+; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT:    ret
+  %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %x)
+  %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
+  %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
+  store <2 x double> %result.0, ptr %out_sin, align 8
+  store <2 x double> %result.1, ptr %out_cos, align 8
+  ret void
+}
+
+define void @test_sincospi_nxv4f32(<vscale x 4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_nxv4f32:
+; SLEEF:       // %bb.0:
+; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT:    .cfi_def_cfa_offset 16
+; SLEEF-NEXT:    .cfi_offset w30, -16
+; SLEEF-NEXT:    bl _ZGVsNxvl4l4_sincospif
+; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT:    ret
+;
+; ARMPL-LABEL: test_sincospi_nxv4f32:
+; ARMPL:       // %bb.0:
+; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT:    .cfi_def_cfa_offset 16
+; ARMPL-NEXT:    .cfi_offset w30, -16
+; ARMPL-NEXT:    ptrue p0.s
+; ARMPL-NEXT:    bl armpl_svsincospi_f32_x
+; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT:    ret
+  %result = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincospi.nxv4f32(<vscale x 4 x float> %x)
+  %result.0 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 0
+  %result.1 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 1
+  store <vscale x 4 x float> %result.0, ptr %out_sin, align 4
+  store <vscale x 4 x float> %result.1, ptr %out_cos, align 4
+  ret void
+}
+
+define void @test_sincospi_nxv2f64(<vscale x 2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_nxv2f64:
+; SLEEF:       // %bb.0:
+; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT:    .cfi_def_cfa_offset 16
+; SLEEF-NEXT:    .cfi_offset w30, -16
+; SLEEF-NEXT:    bl _ZGVsNxvl8l8_sincospi
+; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT:    ret
+;
+; ARMPL-LABEL: test_sincospi_nxv2f64:
+; ARMPL:       // %bb.0:
+; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT:    .cfi_def_cfa_offset 16
+; ARMPL-NEXT:    .cfi_offset w30, -16
+; ARMPL-NEXT:    ptrue p0.d
+; ARMPL-NEXT:    bl armpl_svsincospi_f64_x
+; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT:    ret
+  %result = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincospi.nxv2f64(<vscale x 2 x double> %x)
+  %result.0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 0
+  %result.1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 1
+  store <vscale x 2 x double> %result.0, ptr %out_sin, align 8
+  store <vscale x 2 x double> %result.1, ptr %out_cos, align 8
+  ret void
+}