diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c8866f15745c2..28e88adfdeac2 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -47,6 +47,7 @@
 #include "clang/CodeGen/BackendUtil.h"
 #include "clang/CodeGen/ConstantInitBuilder.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -394,6 +395,17 @@ static void checkDataLayoutConsistency(const TargetInfo &Target,
     Check("__ibm128", llvm::Type::getPPC_FP128Ty(Context), Target.Ibm128Align);
 
   Check("void*", llvm::PointerType::getUnqual(Context), Target.PointerAlign);
+
+  if (Triple.f128LibmShouldUseLongDouble() &&
+      &Target.getLongDoubleFormat() != &llvm::APFloat::IEEEquad()) {
+    const char *SemName =
+        llvm::APFloatBase::SemanticsName(Target.getLongDoubleFormat());
+    llvm::errs() << "For target `" << Triple.str()
+                 << "` LLVM wants to use `long double` symbols for `_Float128` "
+                    "libm call lowering, but clang specifies `long double` as `"
+                 << SemName << "`\n";
+    abort();
+  }
 #endif
 }
 
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index e1589544787cf..db8043d75c26e 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -256,6 +256,7 @@ struct APFloatBase {
 
   LLVM_ABI static const llvm::fltSemantics &EnumToSemantics(Semantics S);
   LLVM_ABI static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem);
+  LLVM_ABI static const char *SemanticsName(const llvm::fltSemantics &Sem);
 
   LLVM_ABI static const fltSemantics &IEEEhalf() LLVM_READNONE;
   LLVM_ABI static const fltSemantics &BFloat() LLVM_READNONE;
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index a248eb7444b20..ad6944fe7aa90 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3564,7 +3564,8 @@ class LLVM_ABI TargetLoweringBase {
     return Libcalls.getLibcallImpl(Call);
   }
 
-  /// Get the libcall routine name for the specified libcall.
+  /// Get the libcall routine name for the specified libcall if implemented,
+  /// otherwise NULL.
   const char *getLibcallName(RTLIB::Libcall Call) const {
     return Libcalls.getLibcallName(Call);
   }
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index ff343f30f0325..36ec27f870da5 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -22,6 +22,8 @@ def isOSWindows : RuntimeLibcallPredicate<"TT.isOSWindows()">;
 def darwinHasSinCosStret : RuntimeLibcallPredicate<"darwinHasSinCosStret(TT)">;
 def darwinHasExp10 : RuntimeLibcallPredicate<"darwinHasExp10(TT)">;
 def hasSinCos : RuntimeLibcallPredicate<"hasSinCos(TT)">;
+def f128LibmShouldUseLongDouble
+    : RuntimeLibcallPredicate<"f128LibmShouldUseLongDouble(TT)">;
 
 //--------------------------------------------------------------------
 // Declare all kinds of used libcalls
@@ -363,17 +365,37 @@ def MIPS16_RET_DF : RuntimeLibcall;
 def MIPS16_RET_SC : RuntimeLibcall;
 def MIPS16_RET_SF : RuntimeLibcall;
 
-multiclass LibmLongDoubleLibCall<string libcall_basename = !toupper(NAME),
-                                 string rtbasename = NAME> {
+// Create libcall impls for `long double` and `_Float128`. See also `_ld128`
+// impls defined at `LibmF128AsLongDoubleLibcalls`.
+
+// Produce libcall impls for all float types. If provided, `rtbasename` should
+// contain an `X` that will be replaced with the `f`/`l`/`fX` suffix (if not
+// provided, it is appended to the def name).
+multiclass LibmLibcallImpls<string libcall_basename = !toupper(NAME),
+                            string rtbasename = !strconcat(NAME, "X")> {
+  def NAME#"f"
+      : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F32"),
+                           !subst("X", "f", rtbasename)>;
+  def NAME#""
+      : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F64"),
+                           !subst("X", "", rtbasename)>;
   def NAME#"_f128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F128"),
-                           !strconcat(rtbasename, "l")>;
+                           !subst("X", "f128", rtbasename)>;
   def NAME#"_ppcf128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_PPCF128"),
-                           !strconcat(rtbasename, "l")>;
+                           !subst("X", "l", rtbasename)>;
   def NAME#"_f80"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F80"),
-                           !strconcat(rtbasename, "l")>;
+                           !subst("X", "l", rtbasename)>;
+}
+
+multiclass LibmF128AsLongDoubleImpls<string libcall_basename = !toupper(NAME),
+                                     string rtbasename =
+                                         !strconcat(NAME, "X")> {
+  def NAME#"_ld128"
+      : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F128"),
+                           !subst("X", "l", rtbasename)>;
 }
 
 // AArch64 calls
@@ -695,217 +717,55 @@ def __clear_cache : RuntimeLibcallImpl<CLEAR_CACHE>;
 // libm
 //--------------------------------------------------------------------
 
-def fmodf : RuntimeLibcallImpl<REM_F32>;
-def fmod : RuntimeLibcallImpl<REM_F64>;
-def fmodl_f128 : RuntimeLibcallImpl<REM_F128, "fmodl">;
-def fmodl_f80 : RuntimeLibcallImpl<REM_F80, "fmodl">;
-def fmodl_ppc128 : RuntimeLibcallImpl<REM_PPCF128, "fmodl">;
-
-def fmaf : RuntimeLibcallImpl<FMA_F32>;
-def fma : RuntimeLibcallImpl<FMA_F64>;
-defm fma : LibmLongDoubleLibCall;
-
-def sqrtf : RuntimeLibcallImpl<SQRT_F32>;
-def sqrt : RuntimeLibcallImpl<SQRT_F64>;
-defm sqrt : LibmLongDoubleLibCall;
-
-def cbrtf : RuntimeLibcallImpl<CBRT_F32>;
-def cbrt : RuntimeLibcallImpl<CBRT_F64>;
-defm cbrt : LibmLongDoubleLibCall;
-
-def logf : RuntimeLibcallImpl<LOG_F32>;
-def log : RuntimeLibcallImpl<LOG_F64>;
-defm log : LibmLongDoubleLibCall;
-
-def __logf_finite : RuntimeLibcallImpl<LOG_FINITE_F32>;
-def __log_finite : RuntimeLibcallImpl<LOG_FINITE_F64>;
-def __logl_finite_f80 : RuntimeLibcallImpl<LOG_FINITE_F80, "__logl_finite">;
-def __logl_finite_f128 : RuntimeLibcallImpl<LOG_FINITE_F128, "__logl_finite">;
-def __logl_finite_ppcf128 : RuntimeLibcallImpl<LOG_FINITE_PPCF128, "__logl_finite">;
-
-def log2f : RuntimeLibcallImpl<LOG2_F32>;
-def log2 : RuntimeLibcallImpl<LOG2_F64>;
-defm log2 : LibmLongDoubleLibCall;
-
-def __log2f_finite : RuntimeLibcallImpl<LOG2_FINITE_F32>;
-def __log2_finite : RuntimeLibcallImpl<LOG2_FINITE_F64>;
-def __log2l_finite_f80 : RuntimeLibcallImpl<LOG2_FINITE_F80, "__log2l_finite">;
-def __log2l_finite_f128 : RuntimeLibcallImpl<LOG2_FINITE_F128, "__log2l_finite">;
-def __log2l_finite_ppcf128 : RuntimeLibcallImpl<LOG2_FINITE_PPCF128, "__log2l_finite">;
-
-def log10f : RuntimeLibcallImpl<LOG10_F32>;
-def log10 : RuntimeLibcallImpl<LOG10_F64>;
-defm log10 : LibmLongDoubleLibCall;
-
-def __log10f_finite : RuntimeLibcallImpl<LOG10_FINITE_F32>;
-def __log10_finite : RuntimeLibcallImpl<LOG10_FINITE_F64>;
-def __log10l_finite_f80 : RuntimeLibcallImpl<LOG10_FINITE_F80, "__log10l_finite">;
-def __log10l_finite_f128 : RuntimeLibcallImpl<LOG10_FINITE_F128, "__log10l_finite">;
-def __log10l_finite_ppcf128 : RuntimeLibcallImpl<LOG10_FINITE_PPCF128, "__log10l_finite">;
-
-def expf : RuntimeLibcallImpl<EXP_F32>;
-def exp : RuntimeLibcallImpl<EXP_F64>;
-defm exp : LibmLongDoubleLibCall<"EXP", "exp">;
-
-def __expf_finite : RuntimeLibcallImpl<EXP_FINITE_F32>;
-def __exp_finite : RuntimeLibcallImpl<EXP_FINITE_F64>;
-def __expl_finite_f80 : RuntimeLibcallImpl<EXP_FINITE_F80, "__expl_finite">;
-def __expl_finite_f128 : RuntimeLibcallImpl<EXP_FINITE_F128, "__expl_finite">;
-def __expl_finite_ppcf128 : RuntimeLibcallImpl<EXP_FINITE_PPCF128, "__expl_finite">;
-
-def exp2f : RuntimeLibcallImpl<EXP2_F32>;
-def exp2 : RuntimeLibcallImpl<EXP2_F64>;
-defm exp2 : LibmLongDoubleLibCall<"EXP2", "exp2">;
-
-def __exp2f_finite : RuntimeLibcallImpl<EXP2_FINITE_F32>;
-def __exp2_finite : RuntimeLibcallImpl<EXP2_FINITE_F64>;
-def __exp2l_finite_f80 : RuntimeLibcallImpl<EXP2_FINITE_F80, "__exp2l_finite">;
-def __exp2l_finite_f128 : RuntimeLibcallImpl<EXP2_FINITE_F128, "__exp2l_finite">;
-def __exp2l_finite_ppcf128 : RuntimeLibcallImpl<EXP2_FINITE_PPCF128, "__exp2l_finite">;
-
-def exp10f : RuntimeLibcallImpl<EXP10_F32>;
-def exp10 : RuntimeLibcallImpl<EXP10_F64>;
-def exp10l_f80 : RuntimeLibcallImpl<EXP10_F80, "exp10l">;
-def exp10l_f128 : RuntimeLibcallImpl<EXP10_F128, "exp10l">;
-def exp10l_ppcf128 : RuntimeLibcallImpl<EXP10_PPCF128, "exp10l">;
-
-def sinf : RuntimeLibcallImpl<SIN_F32>;
-def sin : RuntimeLibcallImpl<SIN_F64>;
-defm sin : LibmLongDoubleLibCall;
-
-def cosf : RuntimeLibcallImpl<COS_F32>;
-def cos : RuntimeLibcallImpl<COS_F64>;
-defm cos : LibmLongDoubleLibCall;
-
-def tanf : RuntimeLibcallImpl<TAN_F32>;
-def tan : RuntimeLibcallImpl<TAN_F64>;
-defm tan : LibmLongDoubleLibCall;
-
-def sinhf : RuntimeLibcallImpl<SINH_F32>;
-def sinh : RuntimeLibcallImpl<SINH_F64>;
-defm sinh : LibmLongDoubleLibCall;
-
-def coshf : RuntimeLibcallImpl<COSH_F32>;
-def cosh : RuntimeLibcallImpl<COSH_F64>;
-defm cosh : LibmLongDoubleLibCall;
-
-def tanhf : RuntimeLibcallImpl<TANH_F32>;
-def tanh : RuntimeLibcallImpl<TANH_F64>;
-defm tanh : LibmLongDoubleLibCall;
-
-def asinf : RuntimeLibcallImpl<ASIN_F32>;
-def asin : RuntimeLibcallImpl<ASIN_F64>;
-defm asin : LibmLongDoubleLibCall;
-
-def acosf : RuntimeLibcallImpl<ACOS_F32>;
-def acos : RuntimeLibcallImpl<ACOS_F64>;
-defm acos : LibmLongDoubleLibCall;
-
-def atanf : RuntimeLibcallImpl<ATAN_F32>;
-def atan : RuntimeLibcallImpl<ATAN_F64>;
-defm atan : LibmLongDoubleLibCall;
-
-def atan2f : RuntimeLibcallImpl<ATAN2_F32>;
-def atan2 : RuntimeLibcallImpl<ATAN2_F64>;
-defm atan2 : LibmLongDoubleLibCall;
-
-def powf : RuntimeLibcallImpl<POW_F32>;
-def pow : RuntimeLibcallImpl<POW_F64>;
-defm pow : LibmLongDoubleLibCall;
-
-def __powf_finite : RuntimeLibcallImpl<POW_FINITE_F32>;
-def __pow_finite : RuntimeLibcallImpl<POW_FINITE_F64>;
-def __powl_finite_f80 : RuntimeLibcallImpl<POW_FINITE_F80, "__powl_finite">;
-def __powl_finite_f128 : RuntimeLibcallImpl<POW_FINITE_F128, "__powl_finite">;
-def __powl_finite_ppcf128 : RuntimeLibcallImpl<POW_FINITE_PPCF128, "__powl_finite">;
-
-def ceilf : RuntimeLibcallImpl<CEIL_F32>;
-def ceil : RuntimeLibcallImpl<CEIL_F64>;
-defm ceil : LibmLongDoubleLibCall;
-
-def truncf : RuntimeLibcallImpl<TRUNC_F32>;
-def trunc : RuntimeLibcallImpl<TRUNC_F64>;
-defm trunc : LibmLongDoubleLibCall;
-
-def rintf : RuntimeLibcallImpl<RINT_F32>;
-def rint : RuntimeLibcallImpl<RINT_F64>;
-defm rint : LibmLongDoubleLibCall;
-
-def nearbyintf : RuntimeLibcallImpl<NEARBYINT_F32>;
-def nearbyint : RuntimeLibcallImpl<NEARBYINT_F64>;
-defm nearbyint : LibmLongDoubleLibCall;
-
-def roundf : RuntimeLibcallImpl<ROUND_F32>;
-def round : RuntimeLibcallImpl<ROUND_F64>;
-defm round : LibmLongDoubleLibCall;
-
-def roundevenf : RuntimeLibcallImpl<ROUNDEVEN_F32>;
-def roundeven : RuntimeLibcallImpl<ROUNDEVEN_F64>;
-defm roundeven : LibmLongDoubleLibCall;
-
-def floorf : RuntimeLibcallImpl<FLOOR_F32>;
-def floor : RuntimeLibcallImpl<FLOOR_F64>;
-defm floor : LibmLongDoubleLibCall;
-
-def copysignf : RuntimeLibcallImpl<COPYSIGN_F32>;
-def copysign : RuntimeLibcallImpl<COPYSIGN_F64>;
-defm copysign : LibmLongDoubleLibCall;
-
-def fminf : RuntimeLibcallImpl<FMIN_F32>;
-def fmin : RuntimeLibcallImpl<FMIN_F64>;
-defm fmin : LibmLongDoubleLibCall;
-
-def fmaxf : RuntimeLibcallImpl<FMAX_F32>;
-def fmax : RuntimeLibcallImpl<FMAX_F64>;
-defm fmax : LibmLongDoubleLibCall;
-
-def fminimumf : RuntimeLibcallImpl<FMINIMUM_F32>;
-def fminimum : RuntimeLibcallImpl<FMINIMUM_F64>;
-defm fminimum : LibmLongDoubleLibCall;
-
-def fmaximumf : RuntimeLibcallImpl<FMAXIMUM_F32>;
-def fmaximum : RuntimeLibcallImpl<FMAXIMUM_F64>;
-defm fmaximum : LibmLongDoubleLibCall;
-
-def fminimum_numf : RuntimeLibcallImpl<FMINIMUM_NUM_F32>;
-def fminimum_num : RuntimeLibcallImpl<FMINIMUM_NUM_F64>;
-defm fminimum_num : LibmLongDoubleLibCall;
-
-def fmaximum_numf : RuntimeLibcallImpl<FMAXIMUM_NUM_F32>;
-def fmaximum_num : RuntimeLibcallImpl<FMAXIMUM_NUM_F64>;
-defm fmaximum_num : LibmLongDoubleLibCall;
-
-def lroundf : RuntimeLibcallImpl<LROUND_F32>;
-def lround : RuntimeLibcallImpl<LROUND_F64>;
-defm lround : LibmLongDoubleLibCall;
-
-def llroundf : RuntimeLibcallImpl<LLROUND_F32>;
-def llround : RuntimeLibcallImpl<LLROUND_F64>;
-defm llround : LibmLongDoubleLibCall;
-
-def lrintf : RuntimeLibcallImpl<LRINT_F32>;
-def lrint : RuntimeLibcallImpl<LRINT_F64>;
-defm lrint : LibmLongDoubleLibCall;
-
-def llrintf : RuntimeLibcallImpl<LLRINT_F32>;
-def llrint : RuntimeLibcallImpl<LLRINT_F64>;
-defm llrint : LibmLongDoubleLibCall;
-
-def ldexpf : RuntimeLibcallImpl<LDEXP_F32>;
-def ldexp : RuntimeLibcallImpl<LDEXP_F64>;
-defm ldexp : LibmLongDoubleLibCall;
-
-def frexpf : RuntimeLibcallImpl<FREXP_F32>;
-def frexp : RuntimeLibcallImpl<FREXP_F64>;
-defm frexp : LibmLongDoubleLibCall;
-
-def sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
-def sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
-defm sincospi : LibmLongDoubleLibCall;
-
-def modff : RuntimeLibcallImpl<MODF_F32>;
-def modf : RuntimeLibcallImpl<MODF_F64>;
-defm modf : LibmLongDoubleLibCall;
+defm fmod : LibmLibcallImpls<"REM">;
+defm fma : LibmLibcallImpls;
+defm sqrt : LibmLibcallImpls;
+defm cbrt : LibmLibcallImpls;
+defm log : LibmLibcallImpls;
+defm __log_finite : LibmLibcallImpls<"LOG_FINITE", "__logX_finite">;
+defm log2 : LibmLibcallImpls;
+defm __log2_finite : LibmLibcallImpls<"LOG2_FINITE", "__log2X_finite">;
+defm log10 : LibmLibcallImpls;
+defm __log10_finite : LibmLibcallImpls<"LOG10_FINITE", "__log10X_finite">;
+defm exp : LibmLibcallImpls;
+defm __exp_finite : LibmLibcallImpls<"EXP_FINITE", "__expX_finite">;
+defm exp2 : LibmLibcallImpls;
+defm __exp2_finite : LibmLibcallImpls<"EXP2_FINITE", "__exp2X_finite">;
+defm exp10 : LibmLibcallImpls;
+defm sin : LibmLibcallImpls;
+defm cos : LibmLibcallImpls;
+defm tan : LibmLibcallImpls;
+defm sinh : LibmLibcallImpls;
+defm cosh : LibmLibcallImpls;
+defm tanh : LibmLibcallImpls;
+defm asin : LibmLibcallImpls;
+defm acos : LibmLibcallImpls;
+defm atan : LibmLibcallImpls;
+defm atan2 : LibmLibcallImpls;
+defm pow : LibmLibcallImpls;
+defm __pow_finite : LibmLibcallImpls<"POW_FINITE", "__powX_finite">;
+defm ceil : LibmLibcallImpls;
+defm trunc : LibmLibcallImpls;
+defm rint : LibmLibcallImpls;
+defm nearbyint : LibmLibcallImpls;
+defm round : LibmLibcallImpls;
+defm roundeven : LibmLibcallImpls;
+defm floor : LibmLibcallImpls;
+defm copysign : LibmLibcallImpls;
+defm fmin : LibmLibcallImpls;
+defm fmax : LibmLibcallImpls;
+defm fminimum : LibmLibcallImpls;
+defm fmaximum : LibmLibcallImpls;
+defm fminimum_num : LibmLibcallImpls;
+defm fmaximum_num : LibmLibcallImpls;
+defm lround : LibmLibcallImpls;
+defm llround : LibmLibcallImpls;
+defm lrint : LibmLibcallImpls;
+defm llrint : LibmLibcallImpls;
+defm ldexp : LibmLibcallImpls;
+defm frexp : LibmLibcallImpls;
+defm sincospi : LibmLibcallImpls;
+defm modf : LibmLibcallImpls;
 
 // Floating point environment
 def fegetenv : RuntimeLibcallImpl<FEGETENV>;
@@ -966,9 +826,7 @@ def __exp10 : RuntimeLibcallImpl<EXP10_F64>;
 def __sincosf_stret : RuntimeLibcallImpl<SINCOS_STRET_F32>;
 def __sincos_stret : RuntimeLibcallImpl<SINCOS_STRET_F64>;
 
-def sincosf : RuntimeLibcallImpl<SINCOS_F32>;
-def sincos : RuntimeLibcallImpl<SINCOS_F64>;
-defm sincos : LibmLongDoubleLibCall;
+defm sincos : LibmLibcallImpls;
 
 def bzero : RuntimeLibcallImpl<BZERO>;
 def __bzero : RuntimeLibcallImpl<BZERO>;
@@ -980,60 +838,65 @@ def __riscv_flush_icache : RuntimeLibcallImpl<RISCV_FLUSH_ICACHE>;
 // F128 libm Runtime Libcalls
 //===----------------------------------------------------------------------===//
 
-defset list<RuntimeLibcallImpl> LibmF128Libcalls = {
-  def logf128 : RuntimeLibcallImpl<LOG_F128>;
-  def log2f128 : RuntimeLibcallImpl<LOG2_F128>;
-  def log10f128 : RuntimeLibcallImpl<LOG10_F128>;
-  def expf128 : RuntimeLibcallImpl<EXP_F128>;
-  def exp2f128 : RuntimeLibcallImpl<EXP2_F128>;
-  def exp10f128 : RuntimeLibcallImpl<EXP10_F128>;
-  def sinf128 : RuntimeLibcallImpl<SIN_F128>;
-  def cosf128 : RuntimeLibcallImpl<COS_F128>;
-  def tanf128 : RuntimeLibcallImpl<TAN_F128>;
-  def tanhf128 : RuntimeLibcallImpl<TANH_F128>;
-  def sincosf128 : RuntimeLibcallImpl<SINCOS_F128>;
-  def powf128 : RuntimeLibcallImpl<POW_F128>;
-  def fminf128 : RuntimeLibcallImpl<FMIN_F128>;
-  def fmaxf128 : RuntimeLibcallImpl<FMAX_F128>;
-  def fmodf128 : RuntimeLibcallImpl<REM_F128>;
-  def sqrtf128 : RuntimeLibcallImpl<SQRT_F128>;
-  def ceilf128 : RuntimeLibcallImpl<CEIL_F128>;
-  def floorf128 : RuntimeLibcallImpl<FLOOR_F128>;
-  def truncf128 : RuntimeLibcallImpl<TRUNC_F128>;
-  def roundf128 : RuntimeLibcallImpl<ROUND_F128>;
-  def lroundf128 : RuntimeLibcallImpl<LROUND_F128>;
-  def llroundf128 : RuntimeLibcallImpl<LLROUND_F128>;
-  def rintf128 : RuntimeLibcallImpl<RINT_F128>;
-  def lrintf128 : RuntimeLibcallImpl<LRINT_F128>;
-  def llrintf128 : RuntimeLibcallImpl<LLRINT_F128>;
-  def nearbyintf128 : RuntimeLibcallImpl<NEARBYINT_F128>;
-  def fmaf128 : RuntimeLibcallImpl<FMA_F128>;
-  def frexpf128 : RuntimeLibcallImpl<FREXP_F128>;
-  def cbrtf128 : RuntimeLibcallImpl<CBRT_F128>;
-  def fminimumf128 : RuntimeLibcallImpl<FMINIMUM_F128>;
-  def fmaximumf128 : RuntimeLibcallImpl<FMAXIMUM_F128>;
-  def fminimum_numf128 : RuntimeLibcallImpl<FMINIMUM_NUM_F128>;
-  def fmaximum_numf128 : RuntimeLibcallImpl<FMAXIMUM_NUM_F128>;
-  def asinf128 : RuntimeLibcallImpl<ASIN_F128>;
-  def acosf128 : RuntimeLibcallImpl<ACOS_F128>;
-  def atanf128 : RuntimeLibcallImpl<ATAN_F128>;
-  def atan2f128 : RuntimeLibcallImpl<ATAN2_F128>;
-  def ldexpf128 : RuntimeLibcallImpl<LDEXP_F128>;
-  def roundevenf128 : RuntimeLibcallImpl<ROUNDEVEN_F128>;
-  def modff128 : RuntimeLibcallImpl<MODF_F128>;
-  def sinhf128 : RuntimeLibcallImpl<SINH_F128>;
-  def coshf128 : RuntimeLibcallImpl<COSH_F128>;
-  def copysignf128 : RuntimeLibcallImpl<COPYSIGN_F128>;
+// Impls for treating `fp128` as `long double`
+defset list<RuntimeLibcallImpl> LibmF128AsLongDoubleLibcalls = {
+  defm log : LibmF128AsLongDoubleImpls;
+  defm log2 : LibmF128AsLongDoubleImpls;
+  defm log10 : LibmF128AsLongDoubleImpls;
+  defm exp : LibmF128AsLongDoubleImpls;
+  defm exp2 : LibmF128AsLongDoubleImpls;
+  defm exp10 : LibmF128AsLongDoubleImpls;
+  defm sin : LibmF128AsLongDoubleImpls;
+  defm cos : LibmF128AsLongDoubleImpls;
+  defm tan : LibmF128AsLongDoubleImpls;
+  defm tanh : LibmF128AsLongDoubleImpls;
+  defm sincos : LibmF128AsLongDoubleImpls;
+  defm pow : LibmF128AsLongDoubleImpls;
+  defm fmin : LibmF128AsLongDoubleImpls;
+  defm fmax : LibmF128AsLongDoubleImpls;
+  defm fmod : LibmF128AsLongDoubleImpls<"REM">;
+  defm sqrt : LibmF128AsLongDoubleImpls;
+  defm ceil : LibmF128AsLongDoubleImpls;
+  defm floor : LibmF128AsLongDoubleImpls;
+  defm trunc : LibmF128AsLongDoubleImpls;
+  defm round : LibmF128AsLongDoubleImpls;
+  defm lround : LibmF128AsLongDoubleImpls;
+  defm llround : LibmF128AsLongDoubleImpls;
+  defm rint : LibmF128AsLongDoubleImpls;
+  defm lrint : LibmF128AsLongDoubleImpls;
+  defm llrint : LibmF128AsLongDoubleImpls;
+  defm nearbyint : LibmF128AsLongDoubleImpls;
+  defm fma : LibmF128AsLongDoubleImpls;
+  defm frexp : LibmF128AsLongDoubleImpls;
+  defm cbrt : LibmF128AsLongDoubleImpls;
+  defm fminimum : LibmF128AsLongDoubleImpls;
+  defm fmaximum : LibmF128AsLongDoubleImpls;
+  defm fminimum_num : LibmF128AsLongDoubleImpls;
+  defm fmaximum_num : LibmF128AsLongDoubleImpls;
+  defm asin : LibmF128AsLongDoubleImpls;
+  defm acos : LibmF128AsLongDoubleImpls;
+  defm atan : LibmF128AsLongDoubleImpls;
+  defm atan2 : LibmF128AsLongDoubleImpls;
+  defm ldexp : LibmF128AsLongDoubleImpls;
+  defm roundeven : LibmF128AsLongDoubleImpls;
+  defm modf : LibmF128AsLongDoubleImpls;
+  defm sinh : LibmF128AsLongDoubleImpls;
+  defm cosh : LibmF128AsLongDoubleImpls;
+  defm copysign : LibmF128AsLongDoubleImpls;
 }
 
-defset list<RuntimeLibcallImpl> LibmF128FiniteLibcalls = {
-  def __logf128_finite : RuntimeLibcallImpl<LOG_FINITE_F128>;
-  def __log2f128_finite : RuntimeLibcallImpl<LOG2_FINITE_F128>;
-  def __log10f128_finite : RuntimeLibcallImpl<LOG10_FINITE_F128>;
-  def __expf128_finite : RuntimeLibcallImpl<EXP_FINITE_F128>;
-  def __exp2f128_finite : RuntimeLibcallImpl<EXP2_FINITE_F128>;
-  def __exp10f128_finite : RuntimeLibcallImpl<EXP10_FINITE_F128>;
-  def __powf128_finite : RuntimeLibcallImpl<POW_FINITE_F128>;
+defset list<RuntimeLibcallImpl> LibmF128AsLongDoubleFiniteLibcalls = {
+  defm __log_finite : LibmF128AsLongDoubleImpls<"LOG_FINITE", "__logX_finite">;
+  defm __log2_finite
+      : LibmF128AsLongDoubleImpls<"LOG2_FINITE", "__log2X_finite">;
+  defm __log10_finite
+      : LibmF128AsLongDoubleImpls<"LOG10_FINITE", "__log10X_finite">;
+  defm __exp_finite : LibmF128AsLongDoubleImpls<"EXP_FINITE", "__expX_finite">;
+  defm __exp2_finite
+      : LibmF128AsLongDoubleImpls<"EXP2_FINITE", "__exp2X_finite">;
+  defm __exp10_finite
+      : LibmF128AsLongDoubleImpls<"EXP10_FINITE", "__exp10X_finite">;
+  defm __pow_finite : LibmF128AsLongDoubleImpls<"POW_FINITE", "__powX_finite">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1056,10 +919,6 @@ defvar DefaultRuntimeLibcallImpls_ppcf128 =
     !filter(entry, AllDefaultRuntimeLibcallImpls,
             !match(!cast<string>(entry.Provides), "PPCF128"));
 
-defvar DefaultRuntimeLibcallImpls_f128 =
-    !filter(entry, AllDefaultRuntimeLibcallImpls,
-            !match(!cast<string>(entry.Provides), "_F128"));
-
 defvar DefaultRuntimeLibcallImpls =
 !listremove(
   !listremove(
@@ -1847,7 +1706,7 @@ def NVPTXSystemLibrary : SystemRuntimeLibrary<isNVPTX, (add)>;
 //===----------------------------------------------------------------------===//
 
 // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
-defset list<RuntimeLibcallImpl> PPCRuntimeLibcalls = {
+defset list<RuntimeLibcallImpl> PPCOverriddenRuntimeLibcalls = {
   def __addkf3 : RuntimeLibcallImpl<ADD_F128>;
   def __subkf3 : RuntimeLibcallImpl<SUB_F128>;
   def __mulkf3 : RuntimeLibcallImpl<MUL_F128>;
@@ -1891,7 +1750,17 @@ defset list<RuntimeLibcallImpl> PPC32AIXCallList = {
   def ___bzero : RuntimeLibcallImpl<BZERO>;
 }
 
-defvar PPCOverrides = !foreach(entry, PPCRuntimeLibcalls, entry.Provides);
+// List of overriden libcalls as strings, `["ADD_F128", "SUB_F128", ...]`
+defvar PPCOverriddenNames = !foreach(entry, PPCOverriddenRuntimeLibcalls,
+                                     !cast<string>(entry.Provides));
+
+// Default libcalls except for those that appear in `PPCOverrideNames`
+defvar PPCNonOverriddenImpls = !filter(
+    default_entry, DefaultRuntimeLibcallImpls,
+    // `!contains` does not exist, `not->empty->filter` is a slightly hacky way
+    !not(!empty(
+        !filter(overridden, PPCOverriddenNames,
+                !eq(overridden, !cast<string>(default_entry.Provides))))));
 
 def isPPC : RuntimeLibcallPredicate<"TT.isPPC()">;
 def isPPC32 : RuntimeLibcallPredicate<"TT.isPPC32()">;
@@ -1904,20 +1773,22 @@ def isPPC64_AIX : RuntimeLibcallPredicate<"(TT.isPPC64() && TT.isOSAIX())">;
 def AIX32Calls : LibcallImpls<(add PPC32AIXCallList), isPPC32_AIX>;
 def AIX64Calls : LibcallImpls<(add PPC64AIXCallList), isPPC64_AIX>;
 
+// Replace overridden values, adjust mem* symbols, add ppc_f128<->f128
+// conversions.
+defvar PPCDefaultRuntimeLibcallImpls = (add
+    (sub DefaultRuntimeLibcallImpls, PPCNonOverriddenImpls, memcpy),
+    PPCOverriddenRuntimeLibcalls, __extendkftf2, __trunctfkf2,
+    DefaultRuntimeLibcallImpls_ppcf128, AIX32Calls, AIX64Calls);
+
 // FIXME: Current emission behavior with multiple implementations is
 // janky. We need to filter out the conflicting cases with different
 // f128 names, and then add the overrides. We should switch to
 // explicitly adding subsets of the default calls.
 def PPCSystemLibrary
     : SystemRuntimeLibrary<isPPC,
-      (add PPCRuntimeLibcalls,
-           (sub DefaultRuntimeLibcallImpls, memcpy,
-                DefaultRuntimeLibcallImpls_f128),
-           __extendkftf2, __trunctfkf2,
-           DefaultRuntimeLibcallImpls_ppcf128,
-           LibmF128Libcalls, AIX32Calls, AIX64Calls,
-           AvailableIf<memcpy, isNotAIX>,
-           LibcallImpls<(add Int128RTLibcalls), isPPC64>)>;
+                           (add PPCDefaultRuntimeLibcallImpls,
+                               AvailableIf<memcpy, isNotAIX>,
+                               LibcallImpls<(add Int128RTLibcalls), isPPC64>)>;
 
 //===----------------------------------------------------------------------===//
 // RISCV Runtime Libcalls
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 657f4230379e8..85a6738b9a0f3 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -266,13 +266,13 @@ class Triple {
     EABIHF,
     Android,
     Musl,
-    MuslABIN32,
-    MuslABI64,
-    MuslEABI,
-    MuslEABIHF,
-    MuslF32,
-    MuslSF,
-    MuslX32,
+    MuslABIN32, ///< Musl MIPS32 with the N32 ABI
+    MuslABI64,  ///< Musl MIPS32 with the N64 ABI
+    MuslEABI,   ///< Musl Arm32 EABI
+    MuslEABIHF, ///< Musl Arm32 EABI + HF
+    MuslF32,    ///< Musl LoongArch ILP32F/LP64F
+    MuslSF,     ///< Musl LoongArch ILP32S/LP64S
+    MuslX32,    ///< Musl using 32-bit ABI on x86_64
     LLVM,
 
     MSVC,
@@ -1274,6 +1274,10 @@ class Triple {
   /// or an invalid version tuple if this triple doesn't have one.
   LLVM_ABI VersionTuple getMinimumSupportedOSVersion() const;
 
+  /// Return true if `_Float128` libcalls should lower to e.g. `sqrtf` (`long
+  /// double`) rather than the default `sqrtf128`.
+  bool f128LibmShouldUseLongDouble() const;
+
   /// @}
   /// @name Static helpers for IDs.
   /// @{
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e0597988e8907..95d0ef2a7ab26 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -182,8 +182,10 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
   }
 
   const char *LibcallName = getLibcallName(LC);
-  if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
-    reportFatalInternalError("unsupported library call operation");
+  if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName) {
+    reportFatalInternalError("unsupported library call operation: libcall " +
+                             Twine(LC));
+  }
 
   SDValue Callee =
       DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 006f3d59f32b2..376923224d0a0 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -62,59 +62,60 @@ static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
     Info.setLibcallImplCallingConv(Impl, CallingConv::ARM_AAPCS);
 }
 
+/// Set all libm libcalls for _Float128 to `long double` (`*l`) symbols.
 static void setLongDoubleIsF128Libm(RuntimeLibcallsInfo &Info,
                                     bool FiniteOnlyFuncs = false) {
-  Info.setLibcallImpl(RTLIB::REM_F128, RTLIB::fmodf128);
-  Info.setLibcallImpl(RTLIB::FMA_F128, RTLIB::fmaf128);
-  Info.setLibcallImpl(RTLIB::SQRT_F128, RTLIB::sqrtf128);
-  Info.setLibcallImpl(RTLIB::CBRT_F128, RTLIB::cbrtf128);
-  Info.setLibcallImpl(RTLIB::LOG_F128, RTLIB::logf128);
-  Info.setLibcallImpl(RTLIB::LOG2_F128, RTLIB::log2f128);
-  Info.setLibcallImpl(RTLIB::LOG10_F128, RTLIB::log10f128);
-  Info.setLibcallImpl(RTLIB::EXP_F128, RTLIB::expf128);
-  Info.setLibcallImpl(RTLIB::EXP2_F128, RTLIB::exp2f128);
-  Info.setLibcallImpl(RTLIB::EXP10_F128, RTLIB::exp10f128);
-  Info.setLibcallImpl(RTLIB::SIN_F128, RTLIB::sinf128);
-  Info.setLibcallImpl(RTLIB::COS_F128, RTLIB::cosf128);
-  Info.setLibcallImpl(RTLIB::TAN_F128, RTLIB::tanf128);
-  Info.setLibcallImpl(RTLIB::SINCOS_F128, RTLIB::sincosf128);
-  Info.setLibcallImpl(RTLIB::ASIN_F128, RTLIB::asinf128);
-  Info.setLibcallImpl(RTLIB::ACOS_F128, RTLIB::acosf128);
-  Info.setLibcallImpl(RTLIB::ATAN_F128, RTLIB::atanf128);
-  Info.setLibcallImpl(RTLIB::ATAN2_F128, RTLIB::atan2f128);
-  Info.setLibcallImpl(RTLIB::SINH_F128, RTLIB::sinhf128);
-  Info.setLibcallImpl(RTLIB::COSH_F128, RTLIB::coshf128);
-  Info.setLibcallImpl(RTLIB::TANH_F128, RTLIB::tanhf128);
-  Info.setLibcallImpl(RTLIB::POW_F128, RTLIB::powf128);
-  Info.setLibcallImpl(RTLIB::CEIL_F128, RTLIB::ceilf128);
-  Info.setLibcallImpl(RTLIB::TRUNC_F128, RTLIB::truncf128);
-  Info.setLibcallImpl(RTLIB::RINT_F128, RTLIB::rintf128);
-  Info.setLibcallImpl(RTLIB::NEARBYINT_F128, RTLIB::nearbyintf128);
-  Info.setLibcallImpl(RTLIB::ROUND_F128, RTLIB::roundf128);
-  Info.setLibcallImpl(RTLIB::ROUNDEVEN_F128, RTLIB::roundevenf128);
-  Info.setLibcallImpl(RTLIB::FLOOR_F128, RTLIB::floorf128);
-  Info.setLibcallImpl(RTLIB::COPYSIGN_F128, RTLIB::copysignf128);
-  Info.setLibcallImpl(RTLIB::FMIN_F128, RTLIB::fminf128);
-  Info.setLibcallImpl(RTLIB::FMAX_F128, RTLIB::fmaxf128);
-  Info.setLibcallImpl(RTLIB::FMINIMUM_F128, RTLIB::fminimumf128);
-  Info.setLibcallImpl(RTLIB::FMAXIMUM_F128, RTLIB::fmaximumf128);
-  Info.setLibcallImpl(RTLIB::FMINIMUM_NUM_F128, RTLIB::fminimum_numf128);
-  Info.setLibcallImpl(RTLIB::FMAXIMUM_NUM_F128, RTLIB::fmaximum_numf128);
-  Info.setLibcallImpl(RTLIB::LROUND_F128, RTLIB::lroundf128);
-  Info.setLibcallImpl(RTLIB::LLROUND_F128, RTLIB::llroundf128);
-  Info.setLibcallImpl(RTLIB::LRINT_F128, RTLIB::lrintf128);
-  Info.setLibcallImpl(RTLIB::LLRINT_F128, RTLIB::llrintf128);
-  Info.setLibcallImpl(RTLIB::LDEXP_F128, RTLIB::ldexpf128);
-  Info.setLibcallImpl(RTLIB::FREXP_F128, RTLIB::frexpf128);
-  Info.setLibcallImpl(RTLIB::MODF_F128, RTLIB::modff128);
+  Info.setLibcallImpl(RTLIB::REM_F128, RTLIB::fmod_ld128);
+  Info.setLibcallImpl(RTLIB::FMA_F128, RTLIB::fma_ld128);
+  Info.setLibcallImpl(RTLIB::SQRT_F128, RTLIB::sqrt_ld128);
+  Info.setLibcallImpl(RTLIB::CBRT_F128, RTLIB::cbrt_ld128);
+  Info.setLibcallImpl(RTLIB::LOG_F128, RTLIB::log_ld128);
+  Info.setLibcallImpl(RTLIB::LOG2_F128, RTLIB::log2_ld128);
+  Info.setLibcallImpl(RTLIB::LOG10_F128, RTLIB::log10_ld128);
+  Info.setLibcallImpl(RTLIB::EXP_F128, RTLIB::exp_ld128);
+  Info.setLibcallImpl(RTLIB::EXP2_F128, RTLIB::exp2_ld128);
+  Info.setLibcallImpl(RTLIB::EXP10_F128, RTLIB::exp10_ld128);
+  Info.setLibcallImpl(RTLIB::SIN_F128, RTLIB::sin_ld128);
+  Info.setLibcallImpl(RTLIB::COS_F128, RTLIB::cos_ld128);
+  Info.setLibcallImpl(RTLIB::TAN_F128, RTLIB::tan_ld128);
+  Info.setLibcallImpl(RTLIB::SINCOS_F128, RTLIB::sincos_ld128);
+  Info.setLibcallImpl(RTLIB::ASIN_F128, RTLIB::asin_ld128);
+  Info.setLibcallImpl(RTLIB::ACOS_F128, RTLIB::acos_ld128);
+  Info.setLibcallImpl(RTLIB::ATAN_F128, RTLIB::atan_ld128);
+  Info.setLibcallImpl(RTLIB::ATAN2_F128, RTLIB::atan2_ld128);
+  Info.setLibcallImpl(RTLIB::SINH_F128, RTLIB::sinh_ld128);
+  Info.setLibcallImpl(RTLIB::COSH_F128, RTLIB::cosh_ld128);
+  Info.setLibcallImpl(RTLIB::TANH_F128, RTLIB::tanh_ld128);
+  Info.setLibcallImpl(RTLIB::POW_F128, RTLIB::pow_ld128);
+  Info.setLibcallImpl(RTLIB::CEIL_F128, RTLIB::ceil_ld128);
+  Info.setLibcallImpl(RTLIB::TRUNC_F128, RTLIB::trunc_ld128);
+  Info.setLibcallImpl(RTLIB::RINT_F128, RTLIB::rint_ld128);
+  Info.setLibcallImpl(RTLIB::NEARBYINT_F128, RTLIB::nearbyint_ld128);
+  Info.setLibcallImpl(RTLIB::ROUND_F128, RTLIB::round_ld128);
+  Info.setLibcallImpl(RTLIB::ROUNDEVEN_F128, RTLIB::roundeven_ld128);
+  Info.setLibcallImpl(RTLIB::FLOOR_F128, RTLIB::floor_ld128);
+  Info.setLibcallImpl(RTLIB::COPYSIGN_F128, RTLIB::copysign_ld128);
+  Info.setLibcallImpl(RTLIB::FMIN_F128, RTLIB::fmin_ld128);
+  Info.setLibcallImpl(RTLIB::FMAX_F128, RTLIB::fmax_ld128);
+  Info.setLibcallImpl(RTLIB::FMINIMUM_F128, RTLIB::fminimum_ld128);
+  Info.setLibcallImpl(RTLIB::FMAXIMUM_F128, RTLIB::fmaximum_ld128);
+  Info.setLibcallImpl(RTLIB::FMINIMUM_NUM_F128, RTLIB::fminimum_num_ld128);
+  Info.setLibcallImpl(RTLIB::FMAXIMUM_NUM_F128, RTLIB::fmaximum_num_ld128);
+  Info.setLibcallImpl(RTLIB::LROUND_F128, RTLIB::lround_ld128);
+  Info.setLibcallImpl(RTLIB::LLROUND_F128, RTLIB::llround_ld128);
+  Info.setLibcallImpl(RTLIB::LRINT_F128, RTLIB::lrint_ld128);
+  Info.setLibcallImpl(RTLIB::LLRINT_F128, RTLIB::llrint_ld128);
+  Info.setLibcallImpl(RTLIB::LDEXP_F128, RTLIB::ldexp_ld128);
+  Info.setLibcallImpl(RTLIB::FREXP_F128, RTLIB::frexp_ld128);
+  Info.setLibcallImpl(RTLIB::MODF_F128, RTLIB::modf_ld128);
 
   if (FiniteOnlyFuncs) {
-    Info.setLibcallImpl(RTLIB::LOG_FINITE_F128, RTLIB::__logf128_finite);
-    Info.setLibcallImpl(RTLIB::LOG2_FINITE_F128, RTLIB::__log2f128_finite);
-    Info.setLibcallImpl(RTLIB::LOG10_FINITE_F128, RTLIB::__log10f128_finite);
-    Info.setLibcallImpl(RTLIB::EXP_FINITE_F128, RTLIB::__expf128_finite);
-    Info.setLibcallImpl(RTLIB::EXP2_FINITE_F128, RTLIB::__exp2f128_finite);
-    Info.setLibcallImpl(RTLIB::POW_FINITE_F128, RTLIB::__powf128_finite);
+    Info.setLibcallImpl(RTLIB::LOG_FINITE_F128, RTLIB::__log_finite_ld128);
+    Info.setLibcallImpl(RTLIB::LOG2_FINITE_F128, RTLIB::__log2_finite_ld128);
+    Info.setLibcallImpl(RTLIB::LOG10_FINITE_F128, RTLIB::__log10_finite_ld128);
+    Info.setLibcallImpl(RTLIB::EXP_FINITE_F128, RTLIB::__exp_finite_ld128);
+    Info.setLibcallImpl(RTLIB::EXP2_FINITE_F128, RTLIB::__exp2_finite_ld128);
+    Info.setLibcallImpl(RTLIB::POW_FINITE_F128, RTLIB::__pow_finite_ld128);
   } else {
     Info.setLibcallImpl(RTLIB::LOG_FINITE_F128, RTLIB::Unsupported);
     Info.setLibcallImpl(RTLIB::LOG2_FINITE_F128, RTLIB::Unsupported);
@@ -137,15 +138,24 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
                                        ExceptionHandling ExceptionModel,
                                        FloatABI::ABIType FloatABI,
                                        EABI EABIVersion, StringRef ABIName) {
+  LibcallImpl SinCosF128Impl = RTLIB::Unsupported;
   setTargetRuntimeLibcallSets(TT, FloatABI);
 
   // Early exit for targets that have fully ported to tablegen.
   if (TT.isAMDGPU() || TT.isNVPTX() || TT.isWasm())
     return;
 
-  // Use the f128 variants of math functions on x86
-  if (TT.isX86() && TT.isGNUEnvironment())
+  // By default fp128 libcalls get lowered to `*f128` symbols, which is
+  // safest because the symbols are only ever for binary128 on all platforms.
+  // Unfortunately many platforms only have the `*l` (`long double`) symbols,
+  // which vary by architecture and compilation flags, so we have to use them
+  // sometimes.
+  if (TT.f128LibmShouldUseLongDouble()) {
     setLongDoubleIsF128Libm(*this, /*FiniteOnlyFuncs=*/true);
+    SinCosF128Impl = RTLIB::sincos_ld128;
+  } else {
+    SinCosF128Impl = RTLIB::sincos_f128;
+  }
 
   if (TT.isX86() || TT.isVE() || TT.isARM() || TT.isThumb()) {
     if (ExceptionModel == ExceptionHandling::SjLj)
@@ -184,7 +194,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
     setLibcallImpl(RTLIB::SINCOS_F32, RTLIB::sincosf);
     setLibcallImpl(RTLIB::SINCOS_F64, RTLIB::sincos);
     setLibcallImpl(RTLIB::SINCOS_F80, RTLIB::sincos_f80);
-    setLibcallImpl(RTLIB::SINCOS_F128, RTLIB::sincos_f128);
+    setLibcallImpl(RTLIB::SINCOS_F128, SinCosF128Impl);
     setLibcallImpl(RTLIB::SINCOS_PPCF128, RTLIB::sincos_ppcf128);
   }
 
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 5e0b29ffb2590..96a3e9ad8b507 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -261,6 +261,53 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
     llvm_unreachable("Unknown floating semantics");
 }
 
+const char *APFloatBase::SemanticsName(const llvm::fltSemantics &Sem) {
+  switch (APFloatBase::SemanticsToEnum(Sem)) {
+  case S_IEEEhalf:
+    return "IEEEhalf";
+  case S_BFloat:
+    return "BFloat";
+  case S_IEEEsingle:
+    return "IEEEsingle";
+  case S_IEEEdouble:
+    return "IEEEdouble";
+  case S_IEEEquad:
+    return "IEEEquad";
+  case S_PPCDoubleDouble:
+    return "PPCDoubleDouble";
+  case S_PPCDoubleDoubleLegacy:
+    return "PPCDoubleDoubleLegacy";
+  case S_Float8E5M2:
+    return "Float8E5M2";
+  case S_Float8E5M2FNUZ:
+    return "Float8E5M2FNUZ";
+  case S_Float8E4M3:
+    return "Float8E4M3";
+  case S_Float8E4M3FN:
+    return "Float8E4M3FN";
+  case S_Float8E4M3FNUZ:
+    return "Float8E4M3FNUZ";
+  case S_Float8E4M3B11FNUZ:
+    return "Float8E4M3B11FNUZ";
+  case S_Float8E3M4:
+    return "Float8E3M4";
+  case S_FloatTF32:
+    return "FloatTF32";
+  case S_Float8E8M0FNU:
+    return "Float8E8M0FNU";
+  case S_Float6E3M2FN:
+    return "Float6E3M2FN";
+  case S_Float6E2M3FN:
+    return "Float6E2M3FN";
+  case S_Float4E2M1FN:
+    return "Float4E2M1FN";
+  case S_x87DoubleExtended:
+    return "x87DoubleExtended";
+  default:
+    llvm_unreachable("Unknown floating semantics");
+  }
+}
+
 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
diff --git a/llvm/lib/Target/Mips/MipsCCState.cpp b/llvm/lib/Target/Mips/MipsCCState.cpp
index 9e8cd2ea2fd43..9111d142cea39 100644
--- a/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -21,11 +21,16 @@ bool MipsCCState::isF128SoftLibCall(const char *CallSym) {
       "__floatuntitf", "__getf2",      "__gttf2",       "__letf2",
       "__lttf2",       "__multf3",     "__netf2",       "__powitf2",
       "__subtf3",      "__trunctfdf2", "__trunctfsf2",  "__unordtf2",
-      "ceill",         "copysignl",    "cosl",          "exp2l",
-      "expl",          "floorl",       "fmal",          "fmaxl",
-      "fmodl",         "log10l",       "log2l",         "logl",
-      "nearbyintl",    "powl",         "rintl",         "roundl",
-      "sinl",          "sqrtl",        "truncl"};
+      "ceilf128",      "ceill",        "copysignf128",  "copysignl",
+      "cosf128",       "cosl",         "exp2f128",      "exp2l",
+      "expf128",       "expl",         "floorf128",     "floorl",
+      "fmaf128",       "fmal",         "fmaxf128",      "fmaxl",
+      "fmodf128",      "fmodl",        "log10f128",     "log10l",
+      "log2f128",      "log2l",        "logf128",       "logl",
+      "nearbyintf128", "nearbyintl",   "powf128",       "powl",
+      "rintf128",      "rintl",        "roundf128",     "roundl",
+      "sinf128",       "sinl",         "sqrtf128",      "sqrtl",
+      "truncf128",     "truncl"};
 
   // Check that LibCalls is sorted alphabetically.
   auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index 0584c941d2e6e..ebbf4131ae3db 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -2306,6 +2306,43 @@ ExceptionHandling Triple::getDefaultExceptionHandling() const {
   return ExceptionHandling::None;
 }
 
+bool Triple::f128LibmShouldUseLongDouble() const {
+  // Always prefer to lower to `*f128` symbols when they are likely to be
+  // available, to avoid any inaccuracies or problems from libc config.
+  //
+  // Note that the logic should be kept in sync with Clang's LongDoubleFormat.
+
+  // Windows and Apple always use f64 as `long double`.
+  if (isOSWindows() || isOSDarwin())
+    return false;
+
+  // PowerPC has a complicated `long double` situation so `*f128` is always
+  // used.
+  if (isPPC())
+    return false;
+
+  // Most 64-bit architectures use use binary128, a few are binary128 on both
+  // 64- and 32-bit.
+  if (isAArch64() || isLoongArch() || isRISCV() || isSPARC64() || isSystemZ() ||
+      isVE() || isWasm())
+    return true;
+
+  // MIPS64 is usually f128, except on FreeBSD-like operating systems. MIPS32
+  // is f128 only with the N32 ABI (O32 is `f64`).
+  if ((isMIPS64() || isABIN32()) &&
+      !(isOSFreeBSD() || isOSKFreeBSD() || isOSDragonFly()))
+    return true;
+
+  // Android and Ohos use binary128 on x86_64.
+  if (getArch() == Triple::x86_64 && (isAndroid() || isOHOSFamily()))
+    return true;
+
+  // By default, make the safe assumption that `long double !== f128`. This
+  // also catches x86 (`long double` is x87 `f80`) and PowerPC (`long double`
+  // is `f64` or PPC double-double).
+  return false;
+}
+
 // HLSL triple environment orders are relied on in the front end
 static_assert(Triple::Vertex - Triple::Pixel == 1,
               "incorrect HLSL stage order");
diff --git a/llvm/test/CodeGen/ARM/ldexp.ll b/llvm/test/CodeGen/ARM/ldexp.ll
index cdf91eb902e05..2661a1e270798 100644
--- a/llvm/test/CodeGen/ARM/ldexp.ll
+++ b/llvm/test/CodeGen/ARM/ldexp.ll
@@ -38,7 +38,7 @@ entry:
 declare float @ldexpf(float, i32) memory(none)
 
 define fp128 @testExpl(fp128 %val, i32 %a) {
-; LINUX:    bl ldexpl
+; LINUX:    bl ldexpf128
 ; WINDOWS:    b.w ldexpl
 entry:
   %call = tail call fp128 @ldexpl(fp128 %val, i32 %a)
diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll
index 9628405df6bcb..da1ad5ecbe008 100644
--- a/llvm/test/CodeGen/ARM/llvm.sincos.ll
+++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll
@@ -206,7 +206,7 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
 ; CHECK-NEXT:    mov r0, r1
 ; CHECK-NEXT:    mov r1, r2
 ; CHECK-NEXT:    mov r2, r12
-; CHECK-NEXT:    bl sincosl
+; CHECK-NEXT:    bl sincosf128
 ; CHECK-NEXT:    ldrd r2, r3, [sp, #16]
 ; CHECK-NEXT:    ldrd r12, r1, [sp, #8]
 ; CHECK-NEXT:    str r3, [r4, #28]
diff --git a/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
index ccce4bbd2a327..4c035601970db 100644
--- a/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
+++ b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
@@ -11,25 +11,21 @@
 ; * musl (no f128 symbols available)
 ; * Windows and MacOS (no f128 symbols, long double == f64)
 
-; FIXME(#44744): arm32, x86-{32,64} musl targets, MacOS, and Windows don't have
-; f128 long double. They should be passing with CHECK-F128 rather than
-; CHECK-USELD.
-
 ; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-gnu    | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
 ; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-musl   | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
 ; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-none         | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
-; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=arm64-apple-macosx           | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
-; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-none-eabi                | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
-; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-unknown-linux-gnueabi    | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=arm64-apple-macosx           | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-none-eabi                | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-unknown-linux-gnueabi    | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc-unknown-linux-gnu    | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-gnu  | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-musl | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if riscv-registered-target   %{ llc < %s -mtriple=riscv32-unknown-linux-gnu    | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
 ; RUN: %if systemz-registered-target %{ llc < %s -mtriple=s390x-unknown-linux-gnu      | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-S390X %}
 ; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-gnu       | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
-; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-musl      | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-musl      | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-gnu     | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
-; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-musl    | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-musl    | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ;
 ; FIXME(#144006): Windows-MSVC should also be run but has a ldexp selection
 ; failure.
diff --git a/llvm/test/CodeGen/Hexagon/llvm.exp10.ll b/llvm/test/CodeGen/Hexagon/llvm.exp10.ll
index b5fcc4151225a..ef42b9d162b42 100644
--- a/llvm/test/CodeGen/Hexagon/llvm.exp10.ll
+++ b/llvm/test/CodeGen/Hexagon/llvm.exp10.ll
@@ -158,7 +158,7 @@ define fp128 @exp10_f128(fp128 %x) #0 {
 ; CHECK-NEXT:     allocframe(#24)
 ; CHECK-NEXT:    } // 8-byte Folded Spill
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     call exp10l
+; CHECK-NEXT:     call exp10f128
 ; CHECK-NEXT:     r0 = add(r29,#0)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
@@ -191,12 +191,12 @@ define <2 x fp128> @exp10_v2f128(<2 x fp128> %x) #0 {
 ; CHECK-NEXT:     memd(r29+#32) = r21:20
 ; CHECK-NEXT:    } // 8-byte Folded Spill
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     call exp10l
+; CHECK-NEXT:     call exp10f128
 ; CHECK-NEXT:     r19:18 = memd(r29+#64)
 ; CHECK-NEXT:     r21:20 = memd(r29+#72)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     call exp10l
+; CHECK-NEXT:     call exp10f128
 ; CHECK-NEXT:     r0 = add(r29,#0)
 ; CHECK-NEXT:     r3:2 = combine(r19,r18)
 ; CHECK-NEXT:     r5:4 = combine(r21,r20)
diff --git a/llvm/test/CodeGen/Hexagon/llvm.sincos.ll b/llvm/test/CodeGen/Hexagon/llvm.sincos.ll
index f02ac2ca8480f..095c53cce003a 100644
--- a/llvm/test/CodeGen/Hexagon/llvm.sincos.ll
+++ b/llvm/test/CodeGen/Hexagon/llvm.sincos.ll
@@ -1108,11 +1108,11 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; BASE-NEXT:     memd(r29+#40) = r19:18
 ; BASE-NEXT:    } // 8-byte Folded Spill
 ; BASE-NEXT:    {
-; BASE-NEXT:     call sinl
+; BASE-NEXT:     call sinf128
 ; BASE-NEXT:     r19:18 = combine(r3,r2)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call cosl
+; BASE-NEXT:     call cosf128
 ; BASE-NEXT:     r0 = add(r29,#16)
 ; BASE-NEXT:     r3:2 = combine(r19,r18)
 ; BASE-NEXT:     r5:4 = combine(r17,r16)
@@ -1156,7 +1156,7 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; GNU-NEXT:     memw(r29+#4) = r7.new
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
-; GNU-NEXT:     call sincosl
+; GNU-NEXT:     call sincosf128
 ; GNU-NEXT:     memw(r29+#0) = r6
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
@@ -1194,11 +1194,11 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; MUSL-NEXT:     memd(r29+#40) = r19:18
 ; MUSL-NEXT:    } // 8-byte Folded Spill
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call sinl
+; MUSL-NEXT:     call sinf128
 ; MUSL-NEXT:     r19:18 = combine(r3,r2)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call cosl
+; MUSL-NEXT:     call cosf128
 ; MUSL-NEXT:     r0 = add(r29,#16)
 ; MUSL-NEXT:     r3:2 = combine(r19,r18)
 ; MUSL-NEXT:     r5:4 = combine(r17,r16)
@@ -1251,24 +1251,24 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; BASE-NEXT:     memd(r29+#64) = r25:24
 ; BASE-NEXT:    } // 8-byte Folded Spill
 ; BASE-NEXT:    {
-; BASE-NEXT:     call sinl
+; BASE-NEXT:     call sinf128
 ; BASE-NEXT:     r23:22 = memd(r29+#112)
 ; BASE-NEXT:     r25:24 = memd(r29+#120)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call sinl
+; BASE-NEXT:     call sinf128
 ; BASE-NEXT:     r0 = add(r29,#0)
 ; BASE-NEXT:     r3:2 = combine(r23,r22)
 ; BASE-NEXT:     r5:4 = combine(r25,r24)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call cosl
+; BASE-NEXT:     call cosf128
 ; BASE-NEXT:     r0 = add(r29,#48)
 ; BASE-NEXT:     r3:2 = combine(r19,r18)
 ; BASE-NEXT:     r5:4 = combine(r17,r16)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call cosl
+; BASE-NEXT:     call cosf128
 ; BASE-NEXT:     r0 = add(r29,#16)
 ; BASE-NEXT:     r3:2 = combine(r23,r22)
 ; BASE-NEXT:     r5:4 = combine(r25,r24)
@@ -1338,7 +1338,7 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; GNU-NEXT:     memw(r17+#0) = r6.new
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
-; GNU-NEXT:     call sincosl
+; GNU-NEXT:     call sincosf128
 ; GNU-NEXT:     r21:20 = memd(r29+#144)
 ; GNU-NEXT:     memw(r17+#4) = r1
 ; GNU-NEXT:    }
@@ -1349,7 +1349,7 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; GNU-NEXT:     memw(r17+#4) = r0.new
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
-; GNU-NEXT:     call sincosl
+; GNU-NEXT:     call sincosf128
 ; GNU-NEXT:     r0 = add(r29,#40)
 ; GNU-NEXT:     r1 = add(r29,#8)
 ; GNU-NEXT:     memw(r17+#0) = r1.new
@@ -1414,24 +1414,24 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; MUSL-NEXT:     memd(r29+#64) = r25:24
 ; MUSL-NEXT:    } // 8-byte Folded Spill
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call sinl
+; MUSL-NEXT:     call sinf128
 ; MUSL-NEXT:     r23:22 = memd(r29+#112)
 ; MUSL-NEXT:     r25:24 = memd(r29+#120)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call sinl
+; MUSL-NEXT:     call sinf128
 ; MUSL-NEXT:     r0 = add(r29,#0)
 ; MUSL-NEXT:     r3:2 = combine(r23,r22)
 ; MUSL-NEXT:     r5:4 = combine(r25,r24)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call cosl
+; MUSL-NEXT:     call cosf128
 ; MUSL-NEXT:     r0 = add(r29,#48)
 ; MUSL-NEXT:     r3:2 = combine(r19,r18)
 ; MUSL-NEXT:     r5:4 = combine(r17,r16)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call cosl
+; MUSL-NEXT:     call cosf128
 ; MUSL-NEXT:     r0 = add(r29,#16)
 ; MUSL-NEXT:     r3:2 = combine(r23,r22)
 ; MUSL-NEXT:     r5:4 = combine(r25,r24)
diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index f9c953d483ff2..4eb66fd04a945 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -425,14 +425,19 @@ define fp128 @qp_sincos(ptr nocapture readonly %a) nounwind {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    addi r5, r1, 48
-; CHECK-NEXT:    addi r6, r1, 32
-; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    bl sincosf128
+; CHECK-NEXT:    stxv v31, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v30, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    lxv v31, 0(r3)
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    bl cosf128
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lxv v2, 48(r1)
-; CHECK-NEXT:    lxv v3, 32(r1)
-; CHECK-NEXT:    xsmulqp v2, v3, v2
+; CHECK-NEXT:    vmr v30, v2
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    bl sinf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    xsmulqp v2, v30, v2
+; CHECK-NEXT:    lxv v31, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv v30, 32(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -441,28 +446,31 @@ define fp128 @qp_sincos(ptr nocapture readonly %a) nounwind {
 ; CHECK-P8-LABEL: qp_sincos:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    stdu r1, -96(r1)
-; CHECK-P8-NEXT:    std r0, 112(r1)
-; CHECK-P8-NEXT:    addi r30, r1, 48
-; CHECK-P8-NEXT:    addi r29, r1, 32
+; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
+; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    mr r5, r30
-; CHECK-P8-NEXT:    mr r6, r29
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    bl sincosf128
+; CHECK-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; CHECK-P8-NEXT:    xxswapd v31, vs0
+; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    bl cosf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r30
-; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    vmr v30, v2
+; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    bl sinf128
+; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    vmr v3, v2
+; CHECK-P8-NEXT:    vmr v2, v30
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    addi r1, r1, 96
+; CHECK-P8-NEXT:    li r3, 64
+; CHECK-P8-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT:    li r3, 48
+; CHECK-P8-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT:    addi r1, r1, 80
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
-; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-P8-NEXT:    mtlr r0
 ; CHECK-P8-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
index 3c10b09525573..87499e63bff2f 100644
--- a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
+++ b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
@@ -384,38 +384,38 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; WASM32-NEXT:    .local i32
 ; WASM32-NEXT:  # %bb.0:
 ; WASM32-NEXT:    global.get __stack_pointer
-; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.const 48
 ; WASM32-NEXT:    i32.sub
 ; WASM32-NEXT:    local.tee 3
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    local.get 3
 ; WASM32-NEXT:    local.get 1
 ; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call cosl
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    local.get 3
 ; WASM32-NEXT:    i32.const 16
 ; WASM32-NEXT:    i32.add
-; WASM32-NEXT:    local.get 1
-; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call sinl
+; WASM32-NEXT:    call sincosl
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 8
+; WASM32-NEXT:    i64.load 24
 ; WASM32-NEXT:    i64.store 24
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 0
+; WASM32-NEXT:    i64.load 16
 ; WASM32-NEXT:    i64.store 16
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 24
+; WASM32-NEXT:    i64.load 40
 ; WASM32-NEXT:    i64.store 8
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 16
+; WASM32-NEXT:    i64.load 32
 ; WASM32-NEXT:    i64.store 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.const 48
 ; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    # fallthrough-return
@@ -425,38 +425,38 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; WASM64-NEXT:    .local i64
 ; WASM64-NEXT:  # %bb.0:
 ; WASM64-NEXT:    global.get __stack_pointer
-; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.const 48
 ; WASM64-NEXT:    i64.sub
 ; WASM64-NEXT:    local.tee 3
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    local.get 3
 ; WASM64-NEXT:    local.get 1
 ; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call cosl
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    local.get 3
 ; WASM64-NEXT:    i64.const 16
 ; WASM64-NEXT:    i64.add
-; WASM64-NEXT:    local.get 1
-; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call sinl
+; WASM64-NEXT:    call sincosl
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 8
+; WASM64-NEXT:    i64.load 24
 ; WASM64-NEXT:    i64.store 24
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 0
+; WASM64-NEXT:    i64.load 16
 ; WASM64-NEXT:    i64.store 16
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 24
+; WASM64-NEXT:    i64.load 40
 ; WASM64-NEXT:    i64.store 8
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 16
+; WASM64-NEXT:    i64.load 32
 ; WASM64-NEXT:    i64.store 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.const 48
 ; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    # fallthrough-return
@@ -470,66 +470,66 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; WASM32-NEXT:    .local i32
 ; WASM32-NEXT:  # %bb.0:
 ; WASM32-NEXT:    global.get __stack_pointer
-; WASM32-NEXT:    i32.const 64
+; WASM32-NEXT:    i32.const 96
 ; WASM32-NEXT:    i32.sub
 ; WASM32-NEXT:    local.tee 5
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.const 48
 ; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    local.get 3
 ; WASM32-NEXT:    local.get 4
-; WASM32-NEXT:    call cosl
+; WASM32-NEXT:    local.get 5
+; WASM32-NEXT:    i32.const 80
+; WASM32-NEXT:    i32.add
+; WASM32-NEXT:    local.get 5
+; WASM32-NEXT:    i32.const 64
+; WASM32-NEXT:    i32.add
+; WASM32-NEXT:    call sincosl
 ; WASM32-NEXT:    local.get 5
 ; WASM32-NEXT:    local.get 1
 ; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call cosl
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i32.const 48
+; WASM32-NEXT:    i32.const 32
 ; WASM32-NEXT:    i32.add
-; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    local.get 4
-; WASM32-NEXT:    call sinl
 ; WASM32-NEXT:    local.get 5
 ; WASM32-NEXT:    i32.const 16
 ; WASM32-NEXT:    i32.add
-; WASM32-NEXT:    local.get 1
-; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call sinl
+; WASM32-NEXT:    call sincosl
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 40
+; WASM32-NEXT:    i64.load 72
 ; WASM32-NEXT:    i64.store 56
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 32
+; WASM32-NEXT:    i64.load 64
 ; WASM32-NEXT:    i64.store 48
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 8
+; WASM32-NEXT:    i64.load 24
 ; WASM32-NEXT:    i64.store 40
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 0
+; WASM32-NEXT:    i64.load 16
 ; WASM32-NEXT:    i64.store 32
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 56
+; WASM32-NEXT:    i64.load 88
 ; WASM32-NEXT:    i64.store 24
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 48
+; WASM32-NEXT:    i64.load 80
 ; WASM32-NEXT:    i64.store 16
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 24
+; WASM32-NEXT:    i64.load 40
 ; WASM32-NEXT:    i64.store 8
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 16
+; WASM32-NEXT:    i64.load 32
 ; WASM32-NEXT:    i64.store 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i32.const 64
+; WASM32-NEXT:    i32.const 96
 ; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    # fallthrough-return
@@ -539,66 +539,66 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; WASM64-NEXT:    .local i64
 ; WASM64-NEXT:  # %bb.0:
 ; WASM64-NEXT:    global.get __stack_pointer
-; WASM64-NEXT:    i64.const 64
+; WASM64-NEXT:    i64.const 96
 ; WASM64-NEXT:    i64.sub
 ; WASM64-NEXT:    local.tee 5
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.const 48
 ; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    local.get 3
 ; WASM64-NEXT:    local.get 4
-; WASM64-NEXT:    call cosl
+; WASM64-NEXT:    local.get 5
+; WASM64-NEXT:    i64.const 80
+; WASM64-NEXT:    i64.add
+; WASM64-NEXT:    local.get 5
+; WASM64-NEXT:    i64.const 64
+; WASM64-NEXT:    i64.add
+; WASM64-NEXT:    call sincosl
 ; WASM64-NEXT:    local.get 5
 ; WASM64-NEXT:    local.get 1
 ; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call cosl
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.const 48
+; WASM64-NEXT:    i64.const 32
 ; WASM64-NEXT:    i64.add
-; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    local.get 4
-; WASM64-NEXT:    call sinl
 ; WASM64-NEXT:    local.get 5
 ; WASM64-NEXT:    i64.const 16
 ; WASM64-NEXT:    i64.add
-; WASM64-NEXT:    local.get 1
-; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call sinl
+; WASM64-NEXT:    call sincosl
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 40
+; WASM64-NEXT:    i64.load 72
 ; WASM64-NEXT:    i64.store 56
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 32
+; WASM64-NEXT:    i64.load 64
 ; WASM64-NEXT:    i64.store 48
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 8
+; WASM64-NEXT:    i64.load 24
 ; WASM64-NEXT:    i64.store 40
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 0
+; WASM64-NEXT:    i64.load 16
 ; WASM64-NEXT:    i64.store 32
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 56
+; WASM64-NEXT:    i64.load 88
 ; WASM64-NEXT:    i64.store 24
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 48
+; WASM64-NEXT:    i64.load 80
 ; WASM64-NEXT:    i64.store 16
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 24
+; WASM64-NEXT:    i64.load 40
 ; WASM64-NEXT:    i64.store 8
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 16
+; WASM64-NEXT:    i64.load 32
 ; WASM64-NEXT:    i64.store 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.const 64
+; WASM64-NEXT:    i64.const 96
 ; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    # fallthrough-return
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index a7eea04181f60..aa3df9bbcbc81 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -473,7 +473,7 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
-; WIN-NEXT:    callq fmal
+; WIN-NEXT:    callq fmaf128
 ; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -500,7 +500,7 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmal
+; WIN-X86-NEXT:    calll _fmaf128
 ; WIN-X86-NEXT:    addl $52, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -570,7 +570,7 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    callq fmodf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -593,7 +593,7 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    calll _fmodf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -656,7 +656,7 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq ceill
+; WIN-NEXT:    callq ceilf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -675,7 +675,7 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _ceill
+; WIN-X86-NEXT:    calll _ceilf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -738,7 +738,7 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq acosl
+; WIN-NEXT:    callq acosf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -757,7 +757,7 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _acosl
+; WIN-X86-NEXT:    calll _acosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -820,7 +820,7 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq cosl
+; WIN-NEXT:    callq cosf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -839,7 +839,7 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _cosl
+; WIN-X86-NEXT:    calll _cosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -902,7 +902,7 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq coshl
+; WIN-NEXT:    callq coshf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -921,7 +921,7 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _coshl
+; WIN-X86-NEXT:    calll _coshf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -984,7 +984,7 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq expl
+; WIN-NEXT:    callq expf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1003,7 +1003,7 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _expl
+; WIN-X86-NEXT:    calll _expf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1066,7 +1066,7 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq exp2l
+; WIN-NEXT:    callq exp2f128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1085,7 +1085,7 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _exp2l
+; WIN-X86-NEXT:    calll _exp2f128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1148,7 +1148,7 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq floorl
+; WIN-NEXT:    callq floorf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1167,7 +1167,7 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _floorl
+; WIN-X86-NEXT:    calll _floorf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1230,7 +1230,7 @@ define fp128 @log(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq logl
+; WIN-NEXT:    callq logf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1249,7 +1249,7 @@ define fp128 @log(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _logl
+; WIN-X86-NEXT:    calll _logf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1312,7 +1312,7 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq log10l
+; WIN-NEXT:    callq log10f128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1331,7 +1331,7 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _log10l
+; WIN-X86-NEXT:    calll _log10f128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1394,7 +1394,7 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq log2l
+; WIN-NEXT:    callq log2f128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1413,7 +1413,7 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _log2l
+; WIN-X86-NEXT:    calll _log2f128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1483,7 +1483,7 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmaxl
+; WIN-NEXT:    callq fmaxf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1506,7 +1506,7 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmaxl
+; WIN-X86-NEXT:    calll _fmaxf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1576,7 +1576,7 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fminl
+; WIN-NEXT:    callq fminf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1599,7 +1599,7 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fminl
+; WIN-X86-NEXT:    calll _fminf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1662,7 +1662,7 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq nearbyintl
+; WIN-NEXT:    callq nearbyintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1681,7 +1681,7 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _nearbyintl
+; WIN-X86-NEXT:    calll _nearbyintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1751,7 +1751,7 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq powl
+; WIN-NEXT:    callq powf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1774,7 +1774,7 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _powl
+; WIN-X86-NEXT:    calll _powf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1928,7 +1928,7 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq rintl
+; WIN-NEXT:    callq rintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1947,7 +1947,7 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _rintl
+; WIN-X86-NEXT:    calll _rintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2010,7 +2010,7 @@ define fp128 @round(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq roundl
+; WIN-NEXT:    callq roundf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2029,7 +2029,7 @@ define fp128 @round(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _roundl
+; WIN-X86-NEXT:    calll _roundf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2092,7 +2092,7 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq roundevenl
+; WIN-NEXT:    callq roundevenf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2111,7 +2111,7 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _roundevenl
+; WIN-X86-NEXT:    calll _roundevenf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2174,7 +2174,7 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq asinl
+; WIN-NEXT:    callq asinf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2193,7 +2193,7 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _asinl
+; WIN-X86-NEXT:    calll _asinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2256,7 +2256,7 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinl
+; WIN-NEXT:    callq sinf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2275,7 +2275,7 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinl
+; WIN-X86-NEXT:    calll _sinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2338,7 +2338,7 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinhl
+; WIN-NEXT:    callq sinhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2357,7 +2357,7 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinhl
+; WIN-X86-NEXT:    calll _sinhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2420,7 +2420,7 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sqrtl
+; WIN-NEXT:    callq sqrtf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2439,7 +2439,7 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sqrtl
+; WIN-X86-NEXT:    calll _sqrtf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2502,7 +2502,7 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq atanl
+; WIN-NEXT:    callq atanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2521,7 +2521,7 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atanl
+; WIN-X86-NEXT:    calll _atanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2591,7 +2591,7 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq atan2l
+; WIN-NEXT:    callq atan2f128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2614,7 +2614,7 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atan2l
+; WIN-X86-NEXT:    calll _atan2f128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2677,7 +2677,7 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanl
+; WIN-NEXT:    callq tanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2696,7 +2696,7 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanl
+; WIN-X86-NEXT:    calll _tanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2759,7 +2759,7 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanhl
+; WIN-NEXT:    callq tanhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2778,7 +2778,7 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanhl
+; WIN-X86-NEXT:    calll _tanhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2841,7 +2841,7 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq truncl
+; WIN-NEXT:    callq truncf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2860,7 +2860,7 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _truncl
+; WIN-X86-NEXT:    calll _truncf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2913,7 +2913,7 @@ define i32 @lrint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq lrintl
+; WIN-NEXT:    callq lrintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2923,7 +2923,7 @@ define i32 @lrint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _lrintl
+; WIN-X86-NEXT:    calll _lrintf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
@@ -2963,7 +2963,7 @@ define i64 @llrint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq llrintl
+; WIN-NEXT:    callq llrintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2973,7 +2973,7 @@ define i64 @llrint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _llrintl
+; WIN-X86-NEXT:    calll _llrintf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
@@ -3013,7 +3013,7 @@ define i32 @lround(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq lroundl
+; WIN-NEXT:    callq lroundf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -3023,7 +3023,7 @@ define i32 @lround(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _lroundl
+; WIN-X86-NEXT:    calll _lroundf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
@@ -3063,7 +3063,7 @@ define i64 @llround(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq llroundl
+; WIN-NEXT:    callq llroundf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -3073,7 +3073,7 @@ define i64 @llround(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _llroundl
+; WIN-X86-NEXT:    calll _llroundf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index f727a79078627..369a36120009d 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -857,7 +857,7 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    callq fmodf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
@@ -879,7 +879,7 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    calll _fmodf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -949,7 +949,7 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    callq fmodf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
@@ -971,7 +971,7 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl _vf128+4
 ; WIN-X86-NEXT:    pushl _vf128
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    calll _fmodf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1031,7 +1031,7 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sqrtl
+; WIN-NEXT:    callq sqrtf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1049,7 +1049,7 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sqrtl
+; WIN-X86-NEXT:    calll _sqrtf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1109,7 +1109,7 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinl
+; WIN-NEXT:    callq sinf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1127,7 +1127,7 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinl
+; WIN-X86-NEXT:    calll _sinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1187,7 +1187,7 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq cosl
+; WIN-NEXT:    callq cosf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1205,7 +1205,7 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _cosl
+; WIN-X86-NEXT:    calll _cosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1265,7 +1265,7 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq ceill
+; WIN-NEXT:    callq ceilf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1283,7 +1283,7 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _ceill
+; WIN-X86-NEXT:    calll _ceilf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1343,7 +1343,7 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq floorl
+; WIN-NEXT:    callq floorf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1361,7 +1361,7 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _floorl
+; WIN-X86-NEXT:    calll _floorf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1421,7 +1421,7 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq truncl
+; WIN-NEXT:    callq truncf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1439,7 +1439,7 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _truncl
+; WIN-X86-NEXT:    calll _truncf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1499,7 +1499,7 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq nearbyintl
+; WIN-NEXT:    callq nearbyintf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1517,7 +1517,7 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _nearbyintl
+; WIN-X86-NEXT:    calll _nearbyintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1577,7 +1577,7 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq rintl
+; WIN-NEXT:    callq rintf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1595,7 +1595,7 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _rintl
+; WIN-X86-NEXT:    calll _rintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1655,7 +1655,7 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq roundl
+; WIN-NEXT:    callq roundf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1673,7 +1673,7 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _roundl
+; WIN-X86-NEXT:    calll _roundf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1744,7 +1744,7 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
-; WIN-NEXT:    callq fmal
+; WIN-NEXT:    callq fmaf128
 ; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1771,7 +1771,7 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmal
+; WIN-X86-NEXT:    calll _fmaf128
 ; WIN-X86-NEXT:    addl $52, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1829,7 +1829,7 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq acosl
+; WIN-NEXT:    callq acosf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1848,7 +1848,7 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _acosl
+; WIN-X86-NEXT:    calll _acosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1904,7 +1904,7 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq asinl
+; WIN-NEXT:    callq asinf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1923,7 +1923,7 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _asinl
+; WIN-X86-NEXT:    calll _asinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1979,7 +1979,7 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq atanl
+; WIN-NEXT:    callq atanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1998,7 +1998,7 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atanl
+; WIN-X86-NEXT:    calll _atanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2061,7 +2061,7 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq atan2l
+; WIN-NEXT:    callq atan2f128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2084,7 +2084,7 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atan2l
+; WIN-X86-NEXT:    calll _atan2f128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2140,7 +2140,7 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq coshl
+; WIN-NEXT:    callq coshf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2159,7 +2159,7 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _coshl
+; WIN-X86-NEXT:    calll _coshf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2215,7 +2215,7 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinhl
+; WIN-NEXT:    callq sinhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2234,7 +2234,7 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinhl
+; WIN-X86-NEXT:    calll _sinhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2290,7 +2290,7 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanl
+; WIN-NEXT:    callq tanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2309,7 +2309,7 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanl
+; WIN-X86-NEXT:    calll _tanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2365,7 +2365,7 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanhl
+; WIN-NEXT:    callq tanhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2384,7 +2384,7 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanhl
+; WIN-X86-NEXT:    calll _tanhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2455,7 +2455,7 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq modfl
+; WIN-NEXT:    callq modff128
 ; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
@@ -2478,7 +2478,7 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %ecx
-; WIN-X86-NEXT:    calll _modfl
+; WIN-X86-NEXT:    calll _modff128
 ; WIN-X86-NEXT:    addl $24, %esp
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; WIN-X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill