From 02b08f2069cb49cad0cd4f217061e1ccd74df283 Mon Sep 17 00:00:00 2001 From: Mary Kassayova Date: Thu, 5 Jun 2025 09:24:56 +0000 Subject: [PATCH 1/3] [AArch64][VecLib] Add libmvec support for AArch64 targets Change-Id: I07cd07932c0cb94782de8cf0d25c4729a48e695b --- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Driver/Options.td | 5 +- clang/lib/Driver/ToolChains/Clang.cpp | 9 +- clang/test/Driver/fveclib.c | 10 +- llvm/include/llvm/Analysis/VecFuncs.def | 299 ++++++++ llvm/lib/Analysis/TargetLibraryInfo.cpp | 30 + .../replace-with-veclib-libmvec-scalable.ll | 579 +++++++++++++++ .../AArch64/replace-with-veclib-libmvec.ll | 577 +++++++++++++++ .../AArch64/veclib-function-calls.ll | 690 ++++++++++++++++++ .../AArch64/veclib-intrinsic-calls.ll | 502 +++++++++++++ llvm/test/Transforms/Util/add-TLI-mappings.ll | 28 +- 11 files changed, 2722 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll create mode 100644 llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b5e6cf088a4b1..11c23064ab604 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -357,6 +357,8 @@ Modified Compiler Flags - The ``-fchar8_t`` flag is no longer considered in non-C++ languages modes. (#GH55373) +- The ``-fveclib=libmvec`` option now supports AArch64 targets (requires GLIBC 2.40 or newer). + Removed Compiler Flags ------------------------- diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 152df89118a6a..b886b75fa4fa9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3473,8 +3473,9 @@ def fveclib : Joined<["-"], "fveclib=">, Group, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Use the given vector functions library">, HelpTextForVariants<[ClangOption, CC1Option], - "Use the given vector functions library. " - "Note: -fveclib={ArmPL,SLEEF} implies -fno-math-errno">, + "Use the given vector functions library.\n" + " Note: -fveclib={ArmPL,SLEEF,libmvec} implies -fno-math-errno.\n" + " Note: -fveclib=libmvec on AArch64 requires GLIBC 2.40 or newer.">, Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,AMDLIBM,none">, NormalizedValuesScope<"llvm::driver::VectorLibrary">, NormalizedValues<["Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF", diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a74fa81f3cf5b..fdc023d193aa9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5683,11 +5683,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Triple.getArch() != llvm::Triple::x86_64) D.Diag(diag::err_drv_unsupported_opt_for_target) << Name << Triple.getArchName(); - } else if (Name == "libmvec" || Name == "AMDLIBM") { + } else if (Name == "AMDLIBM") { if (Triple.getArch() != llvm::Triple::x86 && Triple.getArch() != llvm::Triple::x86_64) D.Diag(diag::err_drv_unsupported_opt_for_target) << Name << Triple.getArchName(); + } else if (Name == "libmvec") { + if (Triple.getArch() != llvm::Triple::x86 && + Triple.getArch() != llvm::Triple::x86_64 && + Triple.getArch() != llvm::Triple::aarch64 && + Triple.getArch() != llvm::Triple::aarch64_be) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << Name << Triple.getArchName(); } else if (Name == "SLEEF" || Name == "ArmPL") { if (Triple.getArch() != llvm::Triple::aarch64 && Triple.getArch() != llvm::Triple::aarch64_be && diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c index 5420555c36a2a..c57e9aa7a3cc2 100644 --- a/clang/test/Driver/fveclib.c +++ b/clang/test/Driver/fveclib.c @@ -1,6 +1,7 @@ // RUN: %clang -### -c -fveclib=none %s 2>&1 | FileCheck --check-prefix=CHECK-NOLIB %s // RUN: %clang -### -c -fveclib=Accelerate %s 2>&1 | FileCheck --check-prefix=CHECK-ACCELERATE %s // RUN: %clang -### -c --target=x86_64-unknown-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-libmvec %s +// RUN: %clang -### -c --target=aarch64-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-LIBMVEC-AARCH64 %s // RUN: %clang -### -c --target=x86_64-unknown-linux-gnu -fveclib=AMDLIBM %s 2>&1 | FileCheck --check-prefix=CHECK-AMDLIBM %s // RUN: %clang -### -c -fveclib=MASSV %s 2>&1 | FileCheck --check-prefix=CHECK-MASSV %s // RUN: %clang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 | FileCheck --check-prefix=CHECK-DARWIN_LIBSYSTEM_M %s @@ -12,6 +13,7 @@ // CHECK-NOLIB: "-fveclib=none" // CHECK-ACCELERATE: "-fveclib=Accelerate" // CHECK-libmvec: "-fveclib=libmvec" +// CHECK-LIBMVEC-AARCH64: "-fveclib=libmvec" // CHECK-AMDLIBM: "-fveclib=AMDLIBM" // CHECK-MASSV: "-fveclib=MASSV" // CHECK-DARWIN_LIBSYSTEM_M: "-fveclib=Darwin_libsystem_m" @@ -23,7 +25,6 @@ // RUN: not %clang --target=x86 -c -fveclib=SLEEF %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s // RUN: not %clang --target=x86 -c -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s -// RUN: not %clang --target=aarch64 -c -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s // RUN: not %clang --target=aarch64 -c -fveclib=SVML %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s // RUN: not %clang --target=aarch64 -c -fveclib=AMDLIBM %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s // CHECK-ERROR: unsupported option {{.*}} for target @@ -43,6 +44,9 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=libmvec -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBMVEC %s // CHECK-LTO-LIBMVEC: "-plugin-opt=-vector-library=LIBMVEC" +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=libmvec -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBMVEC-AARCH64 %s +// CHECK-LTO-LIBMVEC-AARCH64: "-plugin-opt=-vector-library=LIBMVEC" + // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=AMDLIBM -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-AMDLIBM %s // CHECK-LTO-AMDLIBM: "-plugin-opt=-vector-library=AMDLIBM" @@ -68,6 +72,10 @@ // CHECK-ERRNO-LIBMVEC: "-fveclib=libmvec" // CHECK-ERRNO-LIBMVEC-SAME: "-fmath-errno" +// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=libmvec %s 2>&1 | FileCheck --check-prefix=CHECK-ERRNO-LIBMVEC-AARCH64 %s +// CHECK-ERRNO-LIBMVEC-AARCH64: "-fveclib=libmvec" +// CHECK-ERRNO-LIBMVEC-AARCH64-SAME: "-fmath-errno" + // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=AMDLIBM %s 2>&1 | FileCheck --check-prefix=CHECK-ERRNO-AMDLIBM %s // CHECK-ERRNO-AMDLIBM: "-fveclib=AMDLIBM" // CHECK-ERRNO-AMDLIBM-SAME: "-fmath-errno" diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 68753a2497db2..cb8e6755a486b 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -237,6 +237,305 @@ TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8), "_ZGV_LLVM_N8v") +#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_VF2_VECFUNCS) + +TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("acosf", "_ZGVnN2v_acosf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVnN2v_acos", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN2v_acosf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN2v_acoshf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("asinf", "_ZGVnN2v_asin", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVnN2v_asin", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN2v_asinf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN2v_asinhf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("atanf", "_ZGVnN2v_atanf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN2v_atanf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN2vv_atan2", "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN2vv_atan2f", "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN2v_atanhf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("cbrt", "_ZGVnN2v_cbrt", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN2v_cbrtf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("cosf", "_ZGVnN2v_cos", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN2v_cosf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("coshf", "_ZGVnN2v_coshf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVnN2v_cosh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN2v_coshf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("erf", "_ZGVnN2v_erf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("erff", "_ZGVnN2v_erff", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("erfc", "_ZGVnN2v_erfc", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN2v_erfcf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("expf", "_ZGVnN2v_expf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN2v_expf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("exp10", "_ZGVnN2v_exp10", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN2v_exp10f", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN2v_exp10f", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN2v_exp2f", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN2v_exp2f", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("expm1", "_ZGVnN2v_expm1", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN2v_expm1f", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("hypot", "_ZGVnN2vv_hypot", "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN2vv_hypotf", "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("logf", "_ZGVnN2v_logf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN2v_logf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log10f", "_ZGVnN2v_log10f", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN2v_log10f", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("log1p", "_ZGVnN2v_log1p", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN2v_log1pf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("log2f", "_ZGVnN2v_log2f", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN2v_log2f", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("powf", "_ZGVnN2vv_powf", "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", "_ZGV_LLVM_N2vv") +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN2vv_powf", "_ZGV_LLVM_N2vv") + +TLI_DEFINE_VECFUNC("sin", "_ZGVnN2v_sin", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sinf", "_ZGVnN2v_sinf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVnN2v_sin", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN2v_sinf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN2v_sinhf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVnN2v_sinh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN2v_sinhf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tanf", "_ZGVnN2v_tanf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVnN2v_tan", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN2v_tanf", "_ZGV_LLVM_N2v") + +TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN2v_tanhf", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVnN2v_tanh", "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN2v_tanhf", "_ZGV_LLVM_N2v") + +#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_VF4_VECFUNCS) + +TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN4v_acosf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN4v_asinf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN4v_cbrtf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN4v_coshf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("erff", "_ZGVnN4v_erff", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN4v_erfcf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN4v_exp10f", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN4v_expm1f", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN4vv_hypotf", "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN4v_log1pf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", "_ZGV_LLVM_N4vv") +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", "_ZGV_LLVM_N4vv") + +TLI_DEFINE_VECFUNC("sinf", "_ZGVnN4v_sinf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN4v_sinf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN4v_sinhf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN4v_tanf", "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN4v_tanhf", "_ZGV_LLVM_N4v") + +#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_SCALABLE_VECFUNCS) + +TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("erf", "_ZGVsMxv_erf", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("erff", "_ZGVsMxv_erff", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("erfc", "_ZGVsMxv_erfc", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("erfcf", "_ZGVsMxv_erfcf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("expm1", "_ZGVsMxv_expm1", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("expm1f", "_ZGVsMxv_expm1f", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("hypot", "_ZGVsMxvv_hypot", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("hypotf", "_ZGVsMxvv_hypotf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("log", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("log1p", "_ZGVsMxv_log1p", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log1pf", "_ZGVsMxv_log1pf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv") + +TLI_DEFINE_VECFUNC("sin", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv") + +TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv") + #elif defined(TLI_DEFINE_MASSV_VECFUNCS) // IBM MASS library's vector Functions diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index c8b568354965d..46efa682e77b2 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1299,6 +1299,30 @@ static const VecDesc VecFuncs_LIBMVEC_X86[] = { #undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS }; +static const VecDesc VecFuncs_LIBMVEC_AARCH64_VF2[] = { +#define TLI_DEFINE_LIBMVEC_AARCH64_VF2_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VABI_PREFIX) \ + {SCAL, VEC, /* VF = */ FIXED(2), /* MASK = */ false, VABI_PREFIX, \ + /* CC = */ CallingConv::AArch64_VectorCall}, +#include "llvm/Analysis/VecFuncs.def" +#undef TLI_DEFINE_LIBMVEC_AARCH64_VF2_VECFUNCS +}; +static const VecDesc VecFuncs_LIBMVEC_AARCH64_VF4[] = { +#define TLI_DEFINE_LIBMVEC_AARCH64_VF4_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VABI_PREFIX) \ + {SCAL, VEC, /* VF = */ FIXED(4), /* MASK = */ false, VABI_PREFIX, \ + /* CC = */ CallingConv::AArch64_VectorCall}, +#include "llvm/Analysis/VecFuncs.def" +#undef TLI_DEFINE_LIBMVEC_AARCH64_VF4_VECFUNCS +}; +static const VecDesc VecFuncs_LIBMVEC_AARCH64_VFScalable[] = { +#define TLI_DEFINE_LIBMVEC_AARCH64_SCALABLE_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ + {SCAL, VEC, VF, MASK, VABI_PREFIX, /* CC = */ std::nullopt}, +#include "llvm/Analysis/VecFuncs.def" +#undef TLI_DEFINE_LIBMVEC_AARCH64_SCALABLE_VECFUNCS +}; + static const VecDesc VecFuncs_MASSV[] = { #define TLI_DEFINE_MASSV_VECFUNCS #include "llvm/Analysis/VecFuncs.def" @@ -1376,6 +1400,12 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( case llvm::Triple::x86_64: addVectorizableFunctions(VecFuncs_LIBMVEC_X86); break; + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64_VF2); + addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64_VF4); + addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64_VFScalable); + break; } break; } diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll new file mode 100644 index 0000000000000..1b541d1330aae --- /dev/null +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec-scalable.ll @@ -0,0 +1,579 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt -mattr=+sve -vector-library=LIBMVEC -replace-with-veclib -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +;. +; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxvv_atan2, ptr @_ZGVsMxvv_atan2f, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata" +;. +define @llvm_ceil_vscale_f64( %in) { +; CHECK-LABEL: @llvm_ceil_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.ceil.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.ceil.nxv2f64( %in) + ret %1 +} + +define @llvm_ceil_vscale_f32( %in) { +; CHECK-LABEL: @llvm_ceil_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.ceil.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.ceil.nxv4f32( %in) + ret %1 +} + +define @llvm_copysign_vscale_f64( %mag, %sgn) { +; CHECK-LABEL: @llvm_copysign_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.copysign.nxv2f64( [[MAG:%.*]], [[SGN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.copysign.nxv2f64( %mag, %sgn) + ret %1 +} + +define @llvm_copysign_vscale_f32( %mag, %sgn) { +; CHECK-LABEL: @llvm_copysign_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.copysign.nxv4f32( [[MAG:%.*]], [[SGN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.copysign.nxv4f32( %mag, %sgn) + ret %1 +} + +define @llvm_cos_vscale_f64( %in) { +; CHECK-LABEL: @llvm_cos_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_cos( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.cos.nxv2f64( %in) + ret %1 +} + +define @llvm_cos_vscale_f32( %in) { +; CHECK-LABEL: @llvm_cos_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_cosf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.cos.nxv4f32( %in) + ret %1 +} + +define @llvm_exp_vscale_f64( %in) { +; CHECK-LABEL: @llvm_exp_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.exp.nxv2f64( %in) + ret %1 +} + +define @llvm_exp_vscale_f32( %in) { +; CHECK-LABEL: @llvm_exp_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_expf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.exp.nxv4f32( %in) + ret %1 +} + +define @llvm_exp10_vscale_f64( %in) { +; CHECK-LABEL: @llvm_exp10_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.exp10.nxv2f64( %in) + ret %1 +} + +define @llvm_exp10_vscale_f32( %in) { +; CHECK-LABEL: @llvm_exp10_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10f( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.exp10.nxv4f32( %in) + ret %1 +} + +define @llvm_exp2_vscale_f64( %in) { +; CHECK-LABEL: @llvm_exp2_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.exp2.nxv2f64( %in) + ret %1 +} + +define @llvm_exp2_vscale_f32( %in) { +; CHECK-LABEL: @llvm_exp2_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2f( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.exp2.nxv4f32( %in) + ret %1 +} + +define @llvm_fabs_vscale_f64( %in) { +; CHECK-LABEL: @llvm_fabs_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.fabs.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.fabs.nxv2f64( %in) + ret %1 +} + +define @llvm_fabs_vscale_f32( %in) { +; CHECK-LABEL: @llvm_fabs_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.fabs.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.fabs.nxv4f32( %in) + ret %1 +} + +define @llvm_floor_vscale_f64( %in) { +; CHECK-LABEL: @llvm_floor_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.floor.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.floor.nxv2f64( %in) + ret %1 +} + +define @llvm_floor_vscale_f32( %in) { +; CHECK-LABEL: @llvm_floor_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.floor.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.floor.nxv4f32( %in) + ret %1 +} + +define @llvm_fma_vscale_f64( %a, %b, %c ) { +; CHECK-LABEL: @llvm_fma_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.fma.nxv2f64( [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.fma.nxv2f64( %a, %b, %c) + ret %1 +} + +define @llvm_fma_vscale_f32( %a, %b, %c) { +; CHECK-LABEL: @llvm_fma_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.fma.nxv4f32( [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.fma.nxv4f32( %a, %b, %c) + ret %1 +} + +define @llvm_log_vscale_f64( %in) { +; CHECK-LABEL: @llvm_log_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.log.nxv2f64( %in) + ret %1 +} + +define @llvm_log_vscale_f32( %in) { +; CHECK-LABEL: @llvm_log_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_logf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.log.nxv4f32( %in) + ret %1 +} + +define @llvm_log10_vscale_f64( %in) { +; CHECK-LABEL: @llvm_log10_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log10( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.log10.nxv2f64( %in) + ret %1 +} + +define @llvm_log10_vscale_f32( %in) { +; CHECK-LABEL: @llvm_log10_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log10f( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.log10.nxv4f32( %in) + ret %1 +} + +define @llvm_log2_vscale_f64( %in) { +; CHECK-LABEL: @llvm_log2_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log2( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.log2.nxv2f64( %in) + ret %1 +} + +define @llvm_log2_vscale_f32( %in) { +; CHECK-LABEL: @llvm_log2_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log2f( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.log2.nxv4f32( %in) + ret %1 +} + +define @llvm_maxnum_vscale_f64( %in0, %in1) { +; CHECK-LABEL: @llvm_maxnum_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.maxnum.nxv2f64( [[IN0:%.*]], [[IN1:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.maxnum.nxv2f64( %in0, %in1) + ret %1 +} + +define @llvm_maxnum_vscale_f32( %in0, %in1) { +; CHECK-LABEL: @llvm_maxnum_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.maxnum.nxv4f32( [[IN0:%.*]], [[IN1:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.maxnum.nxv4f32( %in0, %in1) + ret %1 +} + +define @llvm_minnum_vscale_f64( %in0, %in1) { +; CHECK-LABEL: @llvm_minnum_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.minnum.nxv2f64( [[IN0:%.*]], [[IN1:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.minnum.nxv2f64( %in0, %in1) + ret %1 +} + +define @llvm_minnum_vscale_f32( %in0, %in1) { +; CHECK-LABEL: @llvm_minnum_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.minnum.nxv4f32( [[IN0:%.*]], [[IN1:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.minnum.nxv4f32( %in0, %in1) + ret %1 +} + +define @llvm_nearbyint_vscale_f64( %in) { +; CHECK-LABEL: @llvm_nearbyint_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.nearbyint.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.nearbyint.nxv2f64( %in) + ret %1 +} + +define @llvm_nearbyint_vscale_f32( %in) { +; CHECK-LABEL: @llvm_nearbyint_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.nearbyint.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.nearbyint.nxv4f32( %in) + ret %1 +} + +define @llvm_pow_vscale_f64( %in, %pow) { +; CHECK-LABEL: @llvm_pow_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxvv_pow( [[IN:%.*]], [[POW:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.pow.nxv2f64( %in, %pow) + ret %1 +} + +define @llvm_pow_vscale_f32( %in, %pow) { +; CHECK-LABEL: @llvm_pow_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxvv_powf( [[IN:%.*]], [[POW:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.pow.nxv4f32( %in, %pow) + ret %1 +} + +define @llvm_rint_vscale_f64( %in) { +; CHECK-LABEL: @llvm_rint_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.rint.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.rint.nxv2f64( %in) + ret %1 +} + +define @llvm_rint_vscale_f32( %in) { +; CHECK-LABEL: @llvm_rint_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.rint.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.rint.nxv4f32( %in) + ret %1 +} + +define @llvm_round_vscale_f64( %in) { +; CHECK-LABEL: @llvm_round_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.round.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.round.nxv2f64( %in) + ret %1 +} + +define @llvm_round_vscale_f32( %in) { +; CHECK-LABEL: @llvm_round_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.round.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.round.nxv4f32( %in) + ret %1 +} + +define @llvm_sin_vscale_f64( %in) { +; CHECK-LABEL: @llvm_sin_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_sin( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sin.nxv2f64( %in) + ret %1 +} + +define @llvm_sin_vscale_f32( %in) { +; CHECK-LABEL: @llvm_sin_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_sinf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sin.nxv4f32( %in) + ret %1 +} + +define @llvm_sqrt_vscale_f64( %in) { +; CHECK-LABEL: @llvm_sqrt_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sqrt.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sqrt.nxv2f64( %in) + ret %1 +} + +define @llvm_sqrt_vscale_f32( %in) { +; CHECK-LABEL: @llvm_sqrt_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sqrt.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sqrt.nxv4f32( %in) + ret %1 +} + +define @llvm_tan_vscale_f64( %in) { +; CHECK-LABEL: @llvm_tan_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_tan( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tan.nxv2f64( %in) + ret %1 +} + +define @llvm_tan_vscale_f32( %in) { +; CHECK-LABEL: @llvm_tan_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_tanf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tan.nxv4f32( %in) + ret %1 +} + +define @llvm_acos_vscale_f64( %in) { +; CHECK-LABEL: @llvm_acos_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_acos( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.acos.nxv2f64( %in) + ret %1 +} + +define @llvm_acos_vscale_f32( %in) { +; CHECK-LABEL: @llvm_acos_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_acosf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.acos.nxv4f32( %in) + ret %1 +} + +define @llvm_asin_vscale_f64( %in) { +; CHECK-LABEL: @llvm_asin_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_asin( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.asin.nxv2f64( %in) + ret %1 +} + +define @llvm_asin_vscale_f32( %in) { +; CHECK-LABEL: @llvm_asin_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_asinf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.asin.nxv4f32( %in) + ret %1 +} + +define @llvm_atan_vscale_f64( %in) { +; CHECK-LABEL: @llvm_atan_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_atan( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.atan.nxv2f64( %in) + ret %1 +} + +define @llvm_atan_vscale_f32( %in) { +; CHECK-LABEL: @llvm_atan_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_atanf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.atan.nxv4f32( %in) + ret %1 +} + +define @llvm_atan2_vscale_f64( %x, %y) { +; CHECK-LABEL: @llvm_atan2_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxvv_atan2( [[X:%.*]], [[Y:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.atan2.nxv2f64( %x, %y) + ret %1 +} + +define @llvm_atan2_vscale_f32( %x, %y) { +; CHECK-LABEL: @llvm_atan2_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxvv_atan2f( [[X:%.*]], [[Y:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.atan2.nxv4f32( %x, %y) + ret %1 +} + +define @llvm_cosh_vscale_f64( %in) { +; CHECK-LABEL: @llvm_cosh_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_cosh( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.cosh.nxv2f64( %in) + ret %1 +} + +define @llvm_cosh_vscale_f32( %in) { +; CHECK-LABEL: @llvm_cosh_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_coshf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.cosh.nxv4f32( %in) + ret %1 +} + +define @llvm_sinh_vscale_f64( %in) { +; CHECK-LABEL: @llvm_sinh_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_sinh( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sinh.nxv2f64( %in) + ret %1 +} + +define @llvm_sinh_vscale_f32( %in) { +; CHECK-LABEL: @llvm_sinh_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_sinhf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sinh.nxv4f32( %in) + ret %1 +} + +define @llvm_tanh_vscale_f64( %in) { +; CHECK-LABEL: @llvm_tanh_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_tanh( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tanh.nxv2f64( %in) + ret %1 +} + +define @llvm_tanh_vscale_f32( %in) { +; CHECK-LABEL: @llvm_tanh_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_tanhf( [[IN:%.*]], splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.tanh.nxv4f32( %in) + ret %1 +} + + +define @llvm_trunc_vscale_f64( %in) { +; CHECK-LABEL: @llvm_trunc_vscale_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.trunc.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.trunc.nxv2f64( %in) + ret %1 +} + +define @llvm_trunc_vscale_f32( %in) { +; CHECK-LABEL: @llvm_trunc_vscale_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.trunc.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.trunc.nxv4f32( %in) + ret %1 +} + +declare @llvm.ceil.nxv2f64() +declare @llvm.ceil.nxv4f32() +declare @llvm.copysign.nxv2f64(, ) +declare @llvm.copysign.nxv4f32(, ) +declare @llvm.cos.nxv2f64() +declare @llvm.cos.nxv4f32() +declare @llvm.exp.nxv2f64() +declare @llvm.exp.nxv4f32() +declare @llvm.exp2.nxv2f64() +declare @llvm.exp2.nxv4f32() +declare @llvm.exp10.nxv2f64() +declare @llvm.exp10.nxv4f32() +declare @llvm.fabs.nxv2f64() +declare @llvm.fabs.nxv4f32() +declare @llvm.floor.nxv2f64() +declare @llvm.floor.nxv4f32() +declare @llvm.fma.nxv2f64(, , ) +declare @llvm.fma.nxv4f32(, , ) +declare @llvm.log.nxv2f64() +declare @llvm.log.nxv4f32() +declare @llvm.log10.nxv2f64() +declare @llvm.log10.nxv4f32() +declare @llvm.log2.nxv2f64() +declare @llvm.log2.nxv4f32() +declare @llvm.maxnum.nxv2f64(, ) +declare @llvm.maxnum.nxv4f32(, ) +declare @llvm.minnum.nxv2f64(, ) +declare @llvm.minnum.nxv4f32(, ) +declare @llvm.nearbyint.nxv2f64() +declare @llvm.nearbyint.nxv4f32() +declare @llvm.pow.nxv2f64(, ) +declare @llvm.pow.nxv4f32(, ) +declare @llvm.rint.nxv2f64() +declare @llvm.rint.nxv4f32() +declare @llvm.round.nxv2f64() +declare @llvm.round.nxv4f32() +declare @llvm.sin.nxv2f64() +declare @llvm.sin.nxv4f32() +declare @llvm.sqrt.nxv2f64() +declare @llvm.sqrt.nxv4f32() +declare @llvm.tan.nxv2f64() +declare @llvm.tan.nxv4f32() +declare @llvm.trunc.nxv2f64() +declare @llvm.trunc.nxv4f32() +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { "target-features"="+sve" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+sve" } +;. diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll new file mode 100644 index 0000000000000..6323d942a08e7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-libmvec.ll @@ -0,0 +1,577 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt -vector-library=LIBMVEC -replace-with-veclib -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +;. +; CHECK: @llvm.compiler.used = appending global [34 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2vv_atan2, ptr @_ZGVnN4vv_atan2f, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata" +;. +define <2 x double> @llvm_ceil_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_ceil_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_ceil_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_ceil_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_copysign_f64(<2 x double> %mag, <2 x double> %sgn) { +; CHECK-LABEL: @llvm_copysign_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> [[MAG:%.*]], <2 x double> [[SGN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sgn) + ret <2 x double> %1 +} + +define <4 x float> @llvm_copysign_f32(<4 x float> %mag, <4 x float> %sgn) { +; CHECK-LABEL: @llvm_copysign_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> [[MAG:%.*]], <4 x float> [[SGN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sgn) + ret <4 x float> %1 +} + +define <2 x double> @llvm_cos_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_cos_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cos(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_cos_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_cos_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_cosf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_exp_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_exp_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_exp_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_exp_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_expf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_exp10_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_exp10_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_exp10_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_exp10_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_exp2_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_exp2_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_exp2_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_exp2_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_fabs_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_fabs_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_fabs_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_fabs_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_floor_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_floor_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.floor.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_floor_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_floor_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_fma_f64(<2 x double> %a, <2 x double> %b, <2 x double> %c ) { +; CHECK-LABEL: @llvm_fma_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %1 +} + +define <4 x float> @llvm_fma_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: @llvm_fma_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %1 +} + +define <2 x double> @llvm_log_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_log_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_log_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_log_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_logf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_log10_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_log10_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log10(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_log10_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_log10_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log10f(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_log2_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_log2_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_log2(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_log2_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_log2_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_log2f(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_maxnum_f64(<2 x double> %in0, <2 x double> %in1) { +; CHECK-LABEL: @llvm_maxnum_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[IN0:%.*]], <2 x double> [[IN1:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> %in0, <2 x double> %in1) + ret <2 x double> %1 +} + +define <4 x float> @llvm_maxnum_f32(<4 x float> %in0, <4 x float> %in1) { +; CHECK-LABEL: @llvm_maxnum_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> [[IN0:%.*]], <4 x float> [[IN1:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %in0, <4 x float> %in1) + ret <4 x float> %1 +} + +define <2 x double> @llvm_minnum_f64(<2 x double> %in0, <2 x double> %in1) { +; CHECK-LABEL: @llvm_minnum_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.minnum.v2f64(<2 x double> [[IN0:%.*]], <2 x double> [[IN1:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.minnum.v2f64(<2 x double> %in0, <2 x double> %in1) + ret <2 x double> %1 +} + +define <4 x float> @llvm_minnum_f32(<4 x float> %in0, <4 x float> %in1) { +; CHECK-LABEL: @llvm_minnum_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.minnum.v4f32(<4 x float> [[IN0:%.*]], <4 x float> [[IN1:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.minnum.v4f32(<4 x float> %in0, <4 x float> %in1) + ret <4 x float> %1 +} + +define <2 x double> @llvm_nearbyint_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_nearbyint_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_nearbyint_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) { +; CHECK-LABEL: @llvm_pow_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow) + ret <2 x double> %1 +} + +define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) { +; CHECK-LABEL: @llvm_pow_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow) + ret <4 x float> %1 +} + +define <2 x double> @llvm_rint_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_rint_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.rint.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_rint_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_rint_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.rint.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_round_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_round_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.round.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_round_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_round_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.round.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_sin_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_sin_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sin(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_sin_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_sin_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_sqrt_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_sqrt_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_sqrt_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_sqrt_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_tan_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_tan_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tan(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_tan_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_tan_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_acos_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_acos_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_acos(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.acos.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_acos_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_acos_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_acosf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.acos.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_asin_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_asin_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_asin(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.asin.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_asin_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_asin_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_asinf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.asin.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_atan_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_atan_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_atan(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.atan.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_atan_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_atan_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_atanf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.atan.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_atan2_f64(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: @llvm_atan2_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[INX:%.*]], <2 x double> [[INY:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.atan2.v2f64(<2 x double> %x, <2 x double> %y) + ret <2 x double> %1 +} + +define <4 x float> @llvm_atan2_f32(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: @llvm_atan2_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[INX:%.*]], <4 x float> [[INY:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.atan2.v4f32(<4 x float> %x, <4 x float> %y) + ret <4 x float> %1 +} + +define <2 x double> @llvm_cosh_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_cosh_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_cosh(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.cosh.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_cosh_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_cosh_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_coshf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.cosh.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_sinh_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_sinh_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_sinh(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.sinh.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_sinh_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_sinh_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.sinh.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_tanh_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_tanh_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_tanh(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.tanh.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_tanh_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_tanh_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.tanh.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define <2 x double> @llvm_trunc_f64(<2 x double> %in) { +; CHECK-LABEL: @llvm_trunc_f64( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> [[IN:%.*]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_trunc_f32(<4 x float> %in) { +; CHECK-LABEL: @llvm_trunc_f32( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> [[IN:%.*]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.cos.v2f64(<2 x double>) +declare <4 x float> @llvm.cos.v4f32(<4 x float>) +declare <2 x double> @llvm.exp.v2f64(<2 x double>) +declare <4 x float> @llvm.exp.v4f32(<4 x float>) +declare <2 x double> @llvm.exp2.v2f64(<2 x double>) +declare <4 x float> @llvm.exp2.v4f32(<4 x float>) +declare <2 x double> @llvm.exp10.v2f64(<2 x double>) +declare <4 x float> @llvm.exp10.v4f32(<4 x float>) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <2 x double> @llvm.log.v2f64(<2 x double>) +declare <4 x float> @llvm.log.v4f32(<4 x float>) +declare <2 x double> @llvm.log10.v2f64(<2 x double>) +declare <4 x float> @llvm.log10.v4f32(<4 x float>) +declare <2 x double> @llvm.log2.v2f64(<2 x double>) +declare <4 x float> @llvm.log2.v4f32(<4 x float>) +declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) +declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.rint.v2f64(<2 x double>) +declare <4 x float> @llvm.rint.v4f32(<4 x float>) +declare <2 x double> @llvm.round.v2f64(<2 x double>) +declare <4 x float> @llvm.round.v4f32(<4 x float>) +declare <2 x double> @llvm.sin.v2f64(<2 x double>) +declare <4 x float> @llvm.sin.v4f32(<4 x float>) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <2 x double> @llvm.tan.v2f64(<2 x double>) +declare <4 x float> @llvm.tan.v4f32(<4 x float>) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll index c6ea44bb85f11..d87b161371c4e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll @@ -1,4 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(cos|sin|tan|cbrt|erf|exp[^e]|gamma|log|sqrt|copysign|dim|min|mod|hypot|nextafter|pow|fma)" --version 2 +; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=LIBMVEC-NEON +; RUN: opt -mattr=+sve -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s -check-prefix=LIBMVEC-SVE ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=SLEEF-NEON ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s -check-prefix=SLEEF-SVE ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefixes=SLEEF-SVE-NOPRED @@ -19,6 +21,14 @@ declare double @acos(double) declare float @acosf(float) define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @acos_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @acos_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acos( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @acos_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) @@ -64,6 +74,14 @@ define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @acos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @acos_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @acos_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acosf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @acos_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -112,6 +130,14 @@ declare double @acosh(double) declare float @acoshf(float) define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @acosh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @acosh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acosh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @acosh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -157,6 +183,14 @@ define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @acosh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acoshf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @acosh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acoshf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @acosh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acoshf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -205,6 +239,14 @@ declare double @asin(double) declare float @asinf(float) define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @asin_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @asin_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @asin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) @@ -250,6 +292,14 @@ define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @asin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @asin_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @asin_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @asin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -298,6 +348,14 @@ declare double @asinh(double) declare float @asinhf(float) define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @asinh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @asinh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asinh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @asinh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -343,6 +401,14 @@ define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @asinh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinhf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @asinh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asinhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @asinh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinhf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -391,6 +457,14 @@ declare double @atan(double) declare float @atanf(float) define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @atan_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atan( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atan_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) @@ -436,6 +510,14 @@ define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @atan_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atan_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -484,6 +566,14 @@ declare double @atan2(double, double) declare float @atan2f(float, float) define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan2_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @atan2_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_atan2( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atan2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -529,6 +619,14 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan2_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @atan2_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_atan2f( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atan2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -577,6 +675,14 @@ declare double @atanh(double) declare float @atanhf(float) define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atanh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atanh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @atanh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atanh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -622,6 +728,14 @@ define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @atanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atanh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @atanh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atanh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -670,6 +784,14 @@ declare double @cbrt(double) declare float @cbrtf(float) define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cbrt_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cbrt_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cbrt( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cbrt_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]]) @@ -715,6 +837,14 @@ define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cbrt_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cbrtf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cbrt_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cbrtf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cbrt_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cbrtf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -763,6 +893,14 @@ declare double @copysign(double, double) declare float @copysignf(float, float) define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @copysign_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @copysign_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @copysign_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_copysign(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -808,6 +946,14 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @copysign_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @copysign_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @copysign_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_copysignf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -856,6 +1002,14 @@ declare double @cos(double) declare float @cosf(float) define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cos_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cos_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cos( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cos_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) @@ -901,6 +1055,14 @@ define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @cos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cos_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cos_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cosf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cos_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -949,6 +1111,14 @@ declare double @cosh(double) declare float @coshf(float) define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cosh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cosh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cosh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cosh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -994,6 +1164,14 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cosh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cosh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_coshf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cosh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1042,6 +1220,14 @@ declare double @cospi(double) declare float @cospif(float) define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cospi_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cospi_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cospi_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cospi(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1087,6 +1273,14 @@ define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cospi_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cospi_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cospi_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cospif(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1135,6 +1329,14 @@ declare double @erf(double) declare float @erff(float) define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @erf_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @erf_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @erf_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1180,6 +1382,14 @@ define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @erf_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_erff(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @erf_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erff( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @erf_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_erff(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1228,6 +1438,14 @@ declare double @erfc(double) declare float @erfcf(float) define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @erfc_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @erfc_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erfc( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @erfc_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1273,6 +1491,14 @@ define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @erfc_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_erfcf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @erfc_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erfcf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @erfc_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_erfcf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1321,6 +1547,14 @@ declare double @exp(double) declare float @expf(float) define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1366,6 +1600,14 @@ define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1414,6 +1656,14 @@ declare double @exp10(double) declare float @exp10f(float) define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp10_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp10_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1459,6 +1709,14 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp10_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp10_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1507,6 +1765,14 @@ declare double @exp2(double) declare float @exp2f(float) define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp2_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp2_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1552,6 +1818,14 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp2_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp2_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1600,6 +1874,14 @@ declare double @expm1(double) declare float @expm1f(float) define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @expm1_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @expm1_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expm1( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @expm1_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1645,6 +1927,14 @@ define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @expm1_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expm1f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @expm1_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expm1f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @expm1_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expm1f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1693,6 +1983,14 @@ declare double @fdim(double, double) declare float @fdimf(float, float) define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fdim_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fdim_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fdim_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fdim(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -1738,6 +2036,14 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fdim_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fdim_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fdim_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fdimf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -1786,6 +2092,14 @@ declare double @fma(double, double, double) declare float @fmaf(float, float, float) define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fma_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fma_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vvv_fma(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) @@ -1831,6 +2145,14 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fma_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fma_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vvv_fmaf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) @@ -1879,6 +2201,14 @@ declare double @fmax(double, double) declare float @fmaxf(float, float) define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fmax_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fmax_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fmax_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fmax(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -1924,6 +2254,14 @@ define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fmax_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fmax_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fmax_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fmaxf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -1972,6 +2310,14 @@ declare double @fmin(double, double) declare float @fminf(float, float) define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fmin_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fmin_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fmin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fmin(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -2017,6 +2363,14 @@ define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fmin_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fmin_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fmin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fminf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -2065,6 +2419,14 @@ declare double @fmod(double, double) declare float @fmodf(float, float) define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fmod_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fmod_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fmod_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_fmod(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -2110,6 +2472,14 @@ define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fmod_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @fmod_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @fmod_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_fmodf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -2158,6 +2528,14 @@ declare double @hypot(double, double) declare float @hypotf(float, float) define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @hypot_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @hypot_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_hypot( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @hypot_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -2203,6 +2581,14 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @hypot_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_hypotf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @hypot_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_hypotf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @hypot_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_hypotf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -2251,6 +2637,14 @@ declare i32 @ilogb(double) declare i32 @ilogbf(float) define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @ilogb_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @ilogb_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @ilogb_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x i32> @_ZGVnN2v_ilogb(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2296,6 +2690,14 @@ define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @ilogb_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @ilogb_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @ilogb_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x i32> @_ZGVnN4v_ilogbf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2344,6 +2746,14 @@ declare double @ldexp(double, i32) declare float @ldexpf(float, i32) define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @ldexp_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @ldexp_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) +; ; SLEEF-NEON-LABEL: define void @ldexp_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP4:%.*]] = call <2 x double> @_ZGVnN2vv_ldexp(<2 x double> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]]) @@ -2391,6 +2801,14 @@ define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias % } define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @ldexp_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @ldexp_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) +; ; SLEEF-NEON-LABEL: define void @ldexp_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP4:%.*]] = call <4 x float> @_ZGVnN4vv_ldexpf(<4 x float> [[WIDE_LOAD:%.*]], <4 x i32> [[WIDE_LOAD1:%.*]]) @@ -2441,6 +2859,14 @@ declare double @lgamma(double) declare float @lgammaf(float) define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @lgamma_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @lgamma_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @lgamma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2486,6 +2912,14 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @lgamma_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @lgamma_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @lgamma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2534,6 +2968,14 @@ declare double @log(double) declare float @logf(float) define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2579,6 +3021,14 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_logf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2627,6 +3077,14 @@ declare double @log10(double) declare float @log10f(float) define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log10_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log10_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2672,6 +3130,14 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log10_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log10_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2720,6 +3186,14 @@ declare double @log1p(double) declare float @log1pf(float) define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log1p_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log1p_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log1p( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log1p_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2765,6 +3239,14 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log1p_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log1pf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log1p_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log1pf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log1p_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log1pf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2813,6 +3295,14 @@ declare double @log2(double) declare float @log2f(float) define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log2_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log2_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2858,6 +3348,14 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log2_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log2_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2909,6 +3407,14 @@ declare double @modf(double, ptr) declare float @modff(float, ptr) define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; LIBMVEC-NEON-LABEL: define void @modf_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @modf_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) +; ; SLEEF-NEON-LABEL: define void @modf_f64 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) @@ -2953,6 +3459,14 @@ for.cond.cleanup: } define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; LIBMVEC-NEON-LABEL: define void @modf_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @modf_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) +; ; SLEEF-NEON-LABEL: define void @modf_f32 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) @@ -3000,6 +3514,14 @@ declare double @nextafter(double, double) declare float @nextafterf(float, float) define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @nextafter_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @nextafter_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @nextafter_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_nextafter(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -3045,6 +3567,14 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @nextafter_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) +; +; LIBMVEC-SVE-LABEL: define void @nextafter_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @nextafter_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_nextafterf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -3093,6 +3623,14 @@ declare double @pow(double, double) declare float @powf(float, float) define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @pow_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @pow_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @pow_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -3138,6 +3676,14 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @pow_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @pow_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @pow_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -3186,6 +3732,14 @@ declare double @sin(double) declare float @sinf(float) define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sin_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sin_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) @@ -3231,6 +3785,14 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sin_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sin_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -3282,6 +3844,14 @@ declare void @sincos(double, ptr, ptr) declare void @sincosf(float, ptr, ptr) define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; LIBMVEC-NEON-LABEL: define void @sincos_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sincos_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sincos_f64 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -3325,6 +3895,14 @@ for.cond.cleanup: } define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; LIBMVEC-NEON-LABEL: define void @sincos_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sincos_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sincos_f32 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -3374,6 +3952,14 @@ declare void @sincospi(double, ptr, ptr) declare void @sincospif(float, ptr, ptr) define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; LIBMVEC-NEON-LABEL: define void @sincospi_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sincospi_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sincospi_f64 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -3417,6 +4003,14 @@ for.cond.cleanup: } define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; LIBMVEC-NEON-LABEL: define void @sincospi_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sincospi_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sincospi_f32 ; SLEEF-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -3463,6 +4057,14 @@ declare double @sinh(double) declare float @sinhf(float) define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sinh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sinh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sinh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -3508,6 +4110,14 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sinh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sinh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sinh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -3556,6 +4166,14 @@ declare double @sinpi(double) declare float @sinpif(float) define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sinpi_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sinpi_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sinpi_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinpi(<2 x double> [[WIDE_LOAD:%.*]]) @@ -3601,6 +4219,14 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sinpi_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sinpi_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sinpi_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinpif(<4 x float> [[WIDE_LOAD:%.*]]) @@ -3649,6 +4275,14 @@ declare double @sqrt(double) declare float @sqrtf(float) define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sqrt_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sqrt_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sqrt_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[WIDE_LOAD:%.*]]) @@ -3694,6 +4328,14 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sqrt_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sqrt_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sqrt_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -3742,6 +4384,14 @@ declare double @tan(double) declare float @tanf(float) define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tan_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tan_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tan( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tan_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) @@ -3787,6 +4437,14 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tan_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tan_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tan_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -3835,6 +4493,14 @@ declare double @tanh(double) declare float @tanhf(float) define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tanh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tanh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tanh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -3880,6 +4546,14 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tanh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tanh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tanh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -3928,6 +4602,14 @@ declare double @tgamma(double) declare float @tgammaf(float) define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tgamma_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tgamma_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tgamma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[WIDE_LOAD:%.*]]) @@ -3973,6 +4655,14 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { } define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tgamma_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tgamma_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tgamma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[WIDE_LOAD:%.*]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll index f753df32d9ebc..792f45ac57079 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(acos|asin|atan|atan2|cos|cosh|exp|log|sin|sinh|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|tan|tanh|trunc)" --version 2 +; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=LIBMVEC-NEON +; RUN: opt -mattr=+sve -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=LIBMVEC-SVE ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE ; RUN: opt -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=ARMPL-NEON @@ -16,6 +18,15 @@ declare double @llvm.acos.f64(double) declare float @llvm.acos.f32(float) define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @acos_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @acos_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_acos( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.acos.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @acos_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) @@ -51,6 +62,15 @@ define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @acos_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @acos_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @acos_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_acosf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.acos.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @acos_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -89,6 +109,15 @@ declare double @llvm.asin.f64(double) declare float @llvm.asin.f32(float) define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @asin_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @asin_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_asin( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.asin.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @asin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) @@ -124,6 +153,15 @@ define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @asin_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @asin_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @asin_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_asinf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.asin.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @asin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -162,6 +200,15 @@ declare double @llvm.atan.f64(double) declare float @llvm.atan.f32(float) define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @atan_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_atan( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.atan.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atan_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) @@ -197,6 +244,15 @@ define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @atan_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @atan_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_atanf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.atan.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @atan_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -235,6 +291,15 @@ declare double @llvm.atan2.f64(double, double) declare float @llvm.atan2.f32(float, float) define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan2_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @atan2_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_atan2( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.atan2.f64(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @atan2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -270,6 +335,15 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @atan2_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @atan2_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @atan2_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_atan2f( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.atan2.f32(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @atan2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -308,6 +382,14 @@ declare double @llvm.ceil.f64(double) declare float @llvm.ceil.f32(float) define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @ceil_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @ceil_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @ceil_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -343,6 +425,14 @@ define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @ceil_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @ceil_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @ceil_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -381,6 +471,15 @@ declare double @llvm.copysign.f64(double, double) declare float @llvm.copysign.f32(float, float) define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @copysign_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @copysign_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @llvm.copysign.nxv2f64( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]]) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.copysign.f64(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @copysign_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -416,6 +515,15 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @copysign_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @copysign_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @llvm.copysign.nxv4f32( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]]) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.copysign.f32(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @copysign_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -454,6 +562,15 @@ declare double @llvm.cos.f64(double) declare float @llvm.cos.f32(float) define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cos_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cos_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_cos( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cos_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) @@ -489,6 +606,15 @@ define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cos_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cos_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_cosf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cos_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -527,6 +653,15 @@ declare double @llvm.cosh.f64(double) declare float @llvm.cosh.f32(float) define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cosh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cosh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_cosh( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.cosh.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cosh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -562,6 +697,15 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @cosh_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @cosh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @cosh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_coshf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.cosh.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @cosh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -600,6 +744,15 @@ declare double @llvm.exp.f64(double) declare float @llvm.exp.f32(float) define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) @@ -635,6 +788,15 @@ define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_expf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -673,6 +835,15 @@ declare double @llvm.exp10.f64(double) declare float @llvm.exp10.f32(float) define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp10_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp10_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) @@ -708,6 +879,15 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp10_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp10_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -746,6 +926,15 @@ declare double @llvm.exp2.f64(double) declare float @llvm.exp2.f32(float) define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp2_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp2_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) @@ -781,6 +970,15 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @exp2_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @exp2_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @exp2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -819,6 +1017,14 @@ declare double @llvm.fabs.f64(double) declare float @llvm.fabs.f32(float) define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fabs_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @fabs_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @fabs_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -854,6 +1060,14 @@ define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fabs_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @fabs_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @fabs_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -892,6 +1106,14 @@ declare double @llvm.floor.f64(double) declare float @llvm.floor.f32(float) define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @floor_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @floor_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @floor_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -927,6 +1149,14 @@ define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @floor_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @floor_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @floor_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -965,6 +1195,14 @@ declare double @llvm.fma.f64(double, double, double) declare float @llvm.fma.f32(float, float, float) define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fma_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @fma_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) +; ; SLEEF-NEON-LABEL: define void @fma_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) @@ -1000,6 +1238,14 @@ define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @fma_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @fma_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) +; ; SLEEF-NEON-LABEL: define void @fma_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) @@ -1038,6 +1284,15 @@ declare double @llvm.log.f64(double) declare float @llvm.log.f32(float) define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1073,6 +1328,15 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_logf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1111,6 +1375,15 @@ declare double @llvm.log10.f64(double) declare float @llvm.log10.f32(float) define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log10_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log10_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log10( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log10_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1146,6 +1419,15 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log10_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log10_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log10_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1184,6 +1466,15 @@ declare double @llvm.log2.f64(double) declare float @llvm.log2.f32(float) define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log2_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log2_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log2( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log2_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1219,6 +1510,15 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @log2_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @log2_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @log2_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1257,6 +1557,14 @@ declare double @llvm.maxnum.f64(double, double) declare float @llvm.maxnum.f32(float, float) define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @maxnum_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @maxnum_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; SLEEF-NEON-LABEL: define void @maxnum_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -1292,6 +1600,14 @@ define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @maxnum_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @maxnum_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @maxnum_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ; SLEEF-NEON-LABEL: define void @maxnum_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -1330,6 +1646,14 @@ declare double @llvm.minnum.f64(double, double) declare float @llvm.minnum.f32(float, float) define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @minnum_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @minnum_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; SLEEF-NEON-LABEL: define void @minnum_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -1365,6 +1689,14 @@ define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @minnum_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @minnum_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @minnum_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; ; SLEEF-NEON-LABEL: define void @minnum_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -1403,6 +1735,14 @@ declare double @llvm.nearbyint.f64(double) declare float @llvm.nearbyint.f32(float) define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @nearbyint_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @nearbyint_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @nearbyint_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1438,6 +1778,14 @@ define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @nearbyint_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @nearbyint_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @nearbyint_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1476,6 +1824,15 @@ declare double @llvm.pow.f64(double, double) declare float @llvm.pow.f32(float, float) define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @pow_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @pow_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.pow.f64(double [[IN:%.*]], double [[IN]]) +; ; SLEEF-NEON-LABEL: define void @pow_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -1511,6 +1868,15 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @pow_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) +; +; LIBMVEC-SVE-LABEL: define void @pow_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.pow.f32(float [[IN:%.*]], float [[IN]]) +; ; SLEEF-NEON-LABEL: define void @pow_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -1549,6 +1915,14 @@ declare double @llvm.rint.f64(double) declare float @llvm.rint.f32(float) define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @rint_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @rint_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @rint_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1584,6 +1958,14 @@ define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @rint_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @rint_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @rint_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @rint_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1622,6 +2004,14 @@ declare double @llvm.round.f64(double) declare float @llvm.round.f32(float) define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @round_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @round_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @round_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1657,6 +2047,14 @@ define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @round_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @round_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @round_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1695,6 +2093,15 @@ declare double @llvm.sin.f64(double) declare float @llvm.sin.f32(float) define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sin_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sin_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sin( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sin_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1730,6 +2137,15 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sin_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sin_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.sin.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sin_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1768,6 +2184,15 @@ declare double @llvm.sinh.f64(double) declare float @llvm.sinh.f32(float) define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sinh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sinh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sinh( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.sinh.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sinh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1803,6 +2228,15 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @sinh_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sinh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sinh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sinhf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.sinh.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sinh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1841,6 +2275,14 @@ declare double @llvm.sqrt.f64(double) declare float @llvm.sqrt.f32(float) define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sqrt_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sqrt_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sqrt_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1876,6 +2318,14 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @sqrt_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @sqrt_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @sqrt_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @sqrt_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1914,6 +2364,15 @@ declare double @llvm.tan.f64(double) declare float @llvm.tan.f32(float) define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tan_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tan_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tan( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.tan.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tan_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1949,6 +2408,15 @@ define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @tan_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tan_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tan_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tanf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.tan.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tan_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1987,6 +2455,15 @@ declare double @llvm.tanh.f64(double) declare float @llvm.tanh.f32(float) define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tanh_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tanh_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tanh( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @llvm.tanh.f64(double [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tanh_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2022,6 +2499,15 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @tanh_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @tanh_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @tanh_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tanhf( [[WIDE_LOAD:%.*]], splat (i1 true)) +; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @llvm.tanh.f32(float [[IN:%.*]]) +; ; SLEEF-NEON-LABEL: define void @tanh_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2060,6 +2546,14 @@ declare double @llvm.trunc.f64(double) declare float @llvm.trunc.f32(float) define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @trunc_f64 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @trunc_f64 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @trunc_f64 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2095,6 +2589,14 @@ define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) { } define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) { +; LIBMVEC-NEON-LABEL: define void @trunc_f32 +; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; +; LIBMVEC-SVE-LABEL: define void @trunc_f32 +; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) +; ; SLEEF-NEON-LABEL: define void @trunc_f32 ; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; SLEEF-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll index a1f660d31668e..5459512239b64 100644 --- a/llvm/test/Transforms/Util/add-TLI-mappings.ll +++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -1,15 +1,13 @@ ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=SVML -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,SVML ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=AMDLIBM -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,AMDLIBM ; RUN: opt -mtriple=powerpc64-unknown-linux-gnu -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV -; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=LIBMVEC-AARCH64 +; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-AARCH64 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE ; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,SLEEFGNUABI ; RUN: opt -mtriple=riscv64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,SLEEFGNUABI_RISCV ; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=ArmPL -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ARMPL -; LIBMVEC-AARCH64-NOT: llvm.compiler.used - ; COMMON-LABEL: @llvm.compiler.used = appending global ; SVML-SAME: [6 x ptr] [ ; SVML-SAME: ptr @__svml_sin2, @@ -35,6 +33,12 @@ ; MASSV-SAME: ptr @__log10f4 ; ACCELERATE-SAME: [1 x ptr] [ ; ACCELERATE-SAME: ptr @vlog10f +; LIBMVEC-AARCH64-SAME: [5 x ptr] [ +; LIBMVEC-AARCH64-SAME: ptr @_ZGVnN2v_sin, +; LIBMVEC-AARCH64-SAME: ptr @_ZGVsMxv_sin, +; LIBMVEC-AARCH64-SAME: ptr @_ZGVnN2v_log10f, +; LIBMVEC-AARCH64-SAME: ptr @_ZGVnN4v_log10f, +; LIBMVEC-AARCH64-SAME: ptr @_ZGVsMxv_log10f ; LIBMVEC-X86-SAME: [2 x ptr] [ ; LIBMVEC-X86-SAME: ptr @_ZGVbN2v_sin, ; LIBMVEC-X86-SAME: ptr @_ZGVdN4v_sin @@ -100,6 +104,7 @@ define double @sin_f64(double %in) { ; AMDLIBM: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; MASSV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; ACCELERATE: call double @sin(double %{{.*}}) +; LIBMVEC-AARCH64: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; LIBMVEC-X86: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; SLEEFGNUABI: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; SLEEFGNUABI_RISCV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] @@ -158,6 +163,7 @@ define float @call_llvm.log10.f32(float %in) { ; COMMON-LABEL: @call_llvm.log10.f32( ; SVML: call float @llvm.log10.f32(float %{{.*}}) ; AMDLIBM: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] +; LIBMVEC-AARCH64: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; LIBMVEC-X86: call float @llvm.log10.f32(float %{{.*}}) ; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] @@ -167,6 +173,7 @@ define float @call_llvm.log10.f32(float %in) { ; No mapping of "llvm.log10.f32" to a vector function for SVML. ; SVML-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) ; AMDLIBM-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) +; LIBMVEC-AARCH64-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) ; LIBMVEC-X86-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) %call = tail call float @llvm.log10.f32(float %in) ret float %call @@ -196,8 +203,11 @@ declare float @llvm.log10.f32(float) #0 ; MASSV: declare <2 x double> @__sind2(<2 x double>) ; MASSV: declare <4 x float> @__log10f4(<4 x float>) -; LIBMVEC-AARCH64-NOT: declare <2 x double> @_ZGVbN2v_sin(<2 x double>) -; LIBMVEC-AARCH64-NOT: declare <4 x double> @_ZGVdN4v_sin(<4 x double>) +; LIBMVEC-AARCH64: declare aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double>) +; LIBMVEC-AARCH64: declare @_ZGVsMxv_sin(, ) +; LIBMVEC-AARCH64: declare aarch64_vector_pcs <2 x float> @_ZGVnN2v_log10f(<2 x float>) +; LIBMVEC-AARCH64: declare aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float>) +; LIBMVEC-AARCH64: declare @_ZGVsMxv_log10f(, ) ; LIBMVEC-X86: declare <2 x double> @_ZGVbN2v_sin(<2 x double>) ; LIBMVEC-X86: declare <4 x double> @_ZGVdN4v_sin(<4 x double>) @@ -272,6 +282,14 @@ attributes #0 = { nounwind readnone } ; ACCELERATE: attributes #[[LOG10]] = { "vector-function-abi-variant"= ; ACCELERATE-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" } +; LIBMVEC-AARCH64: attributes #[[SIN]] = { "vector-function-abi-variant"= +; LIBMVEC-AARCH64-SAME: "_ZGV_LLVM_N2v_sin(_ZGVnN2v_sin), +; LIBMVEC-AARCH64-SAME: _ZGVsMxv_sin(_ZGVsMxv_sin)" } +; LIBMVEC-AARCH64: attributes #[[LOG10]] = { "vector-function-abi-variant"= +; LIBMVEC-AARCH64-SAME: "_ZGV_LLVM_N2v_llvm.log10.f32(_ZGVnN2v_log10f), +; LIBMVEC-AARCH64-SAME: _ZGV_LLVM_N4v_llvm.log10.f32(_ZGVnN4v_log10f), +; LIBMVEC-AARCH64-SAME: _ZGVsMxv_llvm.log10.f32(_ZGVsMxv_log10f)" } + ; LIBMVEC-X86: attributes #[[SIN]] = { "vector-function-abi-variant"= ; LIBMVEC-X86-SAME: "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin), ; LIBMVEC-X86-SAME: _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" } From 95453d88a7a8cbe19451b6382fc00940c6f9982d Mon Sep 17 00:00:00 2001 From: Mary Kassayova Date: Thu, 12 Jun 2025 12:32:11 +0000 Subject: [PATCH 2/3] Merge VF2, VF4 and Scalable libmvec function macros Change-Id: Ib750e05d6daeca404a02b214272727827af13b2d --- llvm/include/llvm/Analysis/VecFuncs.def | 557 +++++++++++------------- llvm/lib/Analysis/TargetLibraryInfo.cpp | 30 +- 2 files changed, 265 insertions(+), 322 deletions(-) diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index cb8e6755a486b..fd235b54bb8c2 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -237,304 +237,265 @@ TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8), "_ZGV_LLVM_N8v") -#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_VF2_VECFUNCS) - -TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("acosf", "_ZGVnN2v_acosf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVnN2v_acos", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN2v_acosf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN2v_acoshf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("asinf", "_ZGVnN2v_asin", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVnN2v_asin", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN2v_asinf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN2v_asinhf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("atanf", "_ZGVnN2v_atanf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN2v_atanf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN2vv_atan2", "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN2vv_atan2f", "_ZGV_LLVM_N2vv") - -TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN2v_atanhf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("cbrt", "_ZGVnN2v_cbrt", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN2v_cbrtf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("cosf", "_ZGVnN2v_cos", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN2v_cosf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("coshf", "_ZGVnN2v_coshf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVnN2v_cosh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN2v_coshf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("erf", "_ZGVnN2v_erf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("erff", "_ZGVnN2v_erff", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("erfc", "_ZGVnN2v_erfc", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN2v_erfcf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("expf", "_ZGVnN2v_expf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN2v_expf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("exp10", "_ZGVnN2v_exp10", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN2v_exp10f", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN2v_exp10f", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN2v_exp2f", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN2v_exp2f", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("expm1", "_ZGVnN2v_expm1", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN2v_expm1f", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("hypot", "_ZGVnN2vv_hypot", "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN2vv_hypotf", "_ZGV_LLVM_N2vv") - -TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("logf", "_ZGVnN2v_logf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN2v_logf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("log10f", "_ZGVnN2v_log10f", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN2v_log10f", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("log1p", "_ZGVnN2v_log1p", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN2v_log1pf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("log2f", "_ZGVnN2v_log2f", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN2v_log2f", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC("powf", "_ZGVnN2vv_powf", "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", "_ZGV_LLVM_N2vv") -TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN2vv_powf", "_ZGV_LLVM_N2vv") - -TLI_DEFINE_VECFUNC("sin", "_ZGVnN2v_sin", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("sinf", "_ZGVnN2v_sinf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVnN2v_sin", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN2v_sinf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN2v_sinhf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVnN2v_sinh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN2v_sinhf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("tanf", "_ZGVnN2v_tanf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVnN2v_tan", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN2v_tanf", "_ZGV_LLVM_N2v") - -TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN2v_tanhf", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVnN2v_tanh", "_ZGV_LLVM_N2v") -TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN2v_tanhf", "_ZGV_LLVM_N2v") - -#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_VF4_VECFUNCS) - -TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN4v_acosf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN4v_asinf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", "_ZGV_LLVM_N4vv") - -TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN4v_cbrtf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN4v_coshf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("erff", "_ZGVnN4v_erff", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN4v_erfcf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN4v_exp10f", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN4v_expm1f", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN4vv_hypotf", "_ZGV_LLVM_N4vv") - -TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN4v_log1pf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", "_ZGV_LLVM_N4vv") -TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", "_ZGV_LLVM_N4vv") - -TLI_DEFINE_VECFUNC("sinf", "_ZGVnN4v_sinf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN4v_sinf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN4v_sinhf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN4v_tanf", "_ZGV_LLVM_N4v") - -TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", "_ZGV_LLVM_N4v") -TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN4v_tanhf", "_ZGV_LLVM_N4v") - -#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_SCALABLE_VECFUNCS) - -TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv") -TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv") -TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv") -TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv") - -TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("erf", "_ZGVsMxv_erf", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("erff", "_ZGVsMxv_erff", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("erfc", "_ZGVsMxv_erfc", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("erfcf", "_ZGVsMxv_erfcf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("expm1", "_ZGVsMxv_expm1", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("expm1f", "_ZGVsMxv_expm1f", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("hypot", "_ZGVsMxvv_hypot", SCALABLE(2), MASKED, "_ZGVsMxvv") -TLI_DEFINE_VECFUNC("hypotf", "_ZGVsMxvv_hypotf", SCALABLE(4), MASKED, "_ZGVsMxvv") - -TLI_DEFINE_VECFUNC("log", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("log1p", "_ZGVsMxv_log1p", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("log1pf", "_ZGVsMxv_log1pf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") -TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv") -TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") -TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv") - -TLI_DEFINE_VECFUNC("sin", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv") - -TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv") +#elif defined(TLI_DEFINE_LIBMVEC_AARCH64_VECFUNCS) + +TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("acosf", "_ZGVnN2v_acosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVnN2v_acos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN2v_acosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVnN4v_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.acos.f64", "_ZGVsMxv_acos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.acos.f32", "_ZGVsMxv_acosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("acosh", "_ZGVnN2v_acosh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN2v_acoshf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("acoshf", "_ZGVnN4v_acoshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("asinf", "_ZGVnN2v_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVnN2v_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN2v_asinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVnN4v_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.asin.f64", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.asin.f32", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("asinh", "_ZGVnN2v_asinh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN2v_asinhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("asinhf", "_ZGVnN4v_asinhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("asinh", "_ZGVsMxv_asinh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("asinhf", "_ZGVsMxv_asinhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atanf", "_ZGVnN2v_atanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN2v_atanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN2vv_atan2", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC) +TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN2vv_atan2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC) +TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC) + +TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN2v_atanhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("cbrt", "_ZGVnN2v_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN2v_cbrtf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVnN4v_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cosf", "_ZGVnN2v_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN2v_cosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("coshf", "_ZGVnN2v_coshf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVnN2v_cosh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN2v_coshf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVnN4v_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.cosh.f64", "_ZGVsMxv_cosh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.cosh.f32", "_ZGVsMxv_coshf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("erf", "_ZGVnN2v_erf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("erff", "_ZGVnN2v_erff", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("erff", "_ZGVnN4v_erff", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("erf", "_ZGVsMxv_erf", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("erff", "_ZGVsMxv_erff", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("erfc", "_ZGVnN2v_erfc", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN2v_erfcf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("erfcf", "_ZGVnN4v_erfcf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("erfc", "_ZGVsMxv_erfc", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("erfcf", "_ZGVsMxv_erfcf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("expf", "_ZGVnN2v_expf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN2v_expf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("exp10", "_ZGVnN2v_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN2v_exp10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN4v_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN2v_exp10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN2v_exp2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN2v_exp2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("expm1", "_ZGVnN2v_expm1", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN2v_expm1f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("expm1f", "_ZGVnN4v_expm1f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("expm1", "_ZGVsMxv_expm1", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("expm1f", "_ZGVsMxv_expm1f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("hypot", "_ZGVnN2vv_hypot", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN2vv_hypotf", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("hypotf", "_ZGVnN4vv_hypotf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("hypot", "_ZGVsMxvv_hypot", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC) +TLI_DEFINE_VECFUNC("hypotf", "_ZGVsMxvv_hypotf", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC) + +TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("logf", "_ZGVnN2v_logf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN2v_logf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log10f", "_ZGVnN2v_log10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN2v_log10f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("log1p", "_ZGVnN2v_log1p", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN2v_log1pf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log1pf", "_ZGVnN4v_log1pf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log1p", "_ZGVsMxv_log1p", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("log1pf", "_ZGVsMxv_log1pf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log2f", "_ZGVnN2v_log2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN2v_log2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("powf", "_ZGVnN2vv_powf", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC) +TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN2vv_powf", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC) + +TLI_DEFINE_VECFUNC("sin", "_ZGVnN2v_sin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("sinf", "_ZGVnN2v_sinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("sinf", "_ZGVnN4v_sinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("sin", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN2v_sinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN2v_sinhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVnN2v_sinh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN2v_sinhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVnN4v_sinhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.sinh.f64", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.sinh.f32", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("tanf", "_ZGVnN2v_tanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN2v_tanf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVsMxv_tan", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVsMxv_tanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN2v_tanhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) + +TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVnN2v_tanh", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN2v_tanhf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVnN4v_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("llvm.tanh.f64", "_ZGVsMxv_tanh", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) +TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) #elif defined(TLI_DEFINE_MASSV_VECFUNCS) // IBM MASS library's vector Functions diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 46efa682e77b2..a3ed093134390 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1299,28 +1299,12 @@ static const VecDesc VecFuncs_LIBMVEC_X86[] = { #undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS }; -static const VecDesc VecFuncs_LIBMVEC_AARCH64_VF2[] = { -#define TLI_DEFINE_LIBMVEC_AARCH64_VF2_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VABI_PREFIX) \ - {SCAL, VEC, /* VF = */ FIXED(2), /* MASK = */ false, VABI_PREFIX, \ - /* CC = */ CallingConv::AArch64_VectorCall}, -#include "llvm/Analysis/VecFuncs.def" -#undef TLI_DEFINE_LIBMVEC_AARCH64_VF2_VECFUNCS -}; -static const VecDesc VecFuncs_LIBMVEC_AARCH64_VF4[] = { -#define TLI_DEFINE_LIBMVEC_AARCH64_VF4_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VABI_PREFIX) \ - {SCAL, VEC, /* VF = */ FIXED(4), /* MASK = */ false, VABI_PREFIX, \ - /* CC = */ CallingConv::AArch64_VectorCall}, -#include "llvm/Analysis/VecFuncs.def" -#undef TLI_DEFINE_LIBMVEC_AARCH64_VF4_VECFUNCS -}; -static const VecDesc VecFuncs_LIBMVEC_AARCH64_VFScalable[] = { -#define TLI_DEFINE_LIBMVEC_AARCH64_SCALABLE_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ - {SCAL, VEC, VF, MASK, VABI_PREFIX, /* CC = */ std::nullopt}, +static const VecDesc VecFuncs_LIBMVEC_AARCH64[] = { +#define TLI_DEFINE_LIBMVEC_AARCH64_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX, CC) \ + {SCAL, VEC, VF, MASK, VABI_PREFIX, CC}, #include "llvm/Analysis/VecFuncs.def" -#undef TLI_DEFINE_LIBMVEC_AARCH64_SCALABLE_VECFUNCS +#undef TLI_DEFINE_LIBMVEC_AARCH64_VECFUNCS }; static const VecDesc VecFuncs_MASSV[] = { @@ -1402,9 +1386,7 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64_VF2); - addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64_VF4); - addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64_VFScalable); + addVectorizableFunctions(VecFuncs_LIBMVEC_AARCH64); break; } break; From 82647978a80cca04ffb07dad67fbb13e88cc492a Mon Sep 17 00:00:00 2001 From: Mary Kassayova Date: Mon, 16 Jun 2025 10:49:50 +0000 Subject: [PATCH 3/3] Added test cases for <2 x float> vectors, fixed typos in function mapping Change-Id: Ia863b7dee5585d782cd049d99758b7aba04b5c2c --- llvm/include/llvm/Analysis/VecFuncs.def | 6 +- .../AArch64/veclib-function-calls.ll | 345 ++++++++++++++++++ .../AArch64/veclib-intrinsic-calls.ll | 233 ++++++++++++ 3 files changed, 581 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index fd235b54bb8c2..4015df990729f 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -258,7 +258,7 @@ TLI_DEFINE_VECFUNC("acosh", "_ZGVsMxv_acosh", SCALABLE(2), MASKED, "_ZGVsMxv", TLI_DEFINE_VECFUNC("acoshf", "_ZGVsMxv_acoshf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) -TLI_DEFINE_VECFUNC("asinf", "_ZGVnN2v_asin", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("asinf", "_ZGVnN2v_asinf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) @@ -288,7 +288,7 @@ TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVsMxv_atan", SCALABLE(2), MASKED, "_ZGVs TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) -TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN2vv_atan2", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN2vv_atan2f", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv", CallingConv::AArch64_VectorCall) TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv", CallingConv::AArch64_VectorCall) TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv", NOCC) TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv", NOCC) @@ -312,7 +312,7 @@ TLI_DEFINE_VECFUNC("cbrt", "_ZGVsMxv_cbrt", SCALABLE(2), MASKED, "_ZGVsMxv", NO TLI_DEFINE_VECFUNC("cbrtf", "_ZGVsMxv_cbrtf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) -TLI_DEFINE_VECFUNC("cosf", "_ZGVnN2v_cos", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) +TLI_DEFINE_VECFUNC("cosf", "_ZGVnN2v_cosf", FIXED(2), NOMASK, "_ZGV_LLVM_N2v", CallingConv::AArch64_VectorCall) TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v", CallingConv::AArch64_VectorCall) TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED, "_ZGVsMxv", NOCC) TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED, "_ZGVsMxv", NOCC) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll index d87b161371c4e..670b08987c81e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(cos|sin|tan|cbrt|erf|exp[^e]|gamma|log|sqrt|copysign|dim|min|mod|hypot|nextafter|pow|fma)" --version 2 ; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=LIBMVEC-NEON +; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s -check-prefix=LIBMVEC-NEON-WIDTH-2 ; RUN: opt -mattr=+sve -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s -check-prefix=LIBMVEC-SVE ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=SLEEF-NEON ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -mcpu=neoverse-v1 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s -check-prefix=SLEEF-SVE @@ -25,6 +26,10 @@ define void @acos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @acos_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acos( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -78,6 +83,10 @@ define void @acos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_acosf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @acos_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acosf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -134,6 +143,10 @@ define void @acosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acosh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acosh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @acosh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acosh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -187,6 +200,10 @@ define void @acosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acoshf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acosh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_acoshf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @acosh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_acoshf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -243,6 +260,10 @@ define void @asin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @asin_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -296,6 +317,10 @@ define void @asin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_asinf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @asin_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -352,6 +377,10 @@ define void @asinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asinh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asinh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @asinh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asinh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -405,6 +434,10 @@ define void @asinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinhf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asinh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_asinhf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @asinh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_asinhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -461,6 +494,10 @@ define void @atan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @atan_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atan( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -514,6 +551,10 @@ define void @atan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_atanf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @atan_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -570,6 +611,10 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @atan2_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_atan2( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) @@ -623,6 +668,10 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_atan2f(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @atan2_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_atan2f( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) @@ -679,6 +728,10 @@ define void @atanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atanh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atanh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atanh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @atanh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -732,6 +785,10 @@ define void @atanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atanh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_atanhf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @atanh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_atanhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -788,6 +845,10 @@ define void @cbrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cbrt_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cbrt(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cbrt_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cbrt( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -841,6 +902,10 @@ define void @cbrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cbrtf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cbrt_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_cbrtf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cbrt_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cbrtf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -897,6 +962,10 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @copysign_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @copysign(double [[IN:%.*]], double [[IN]]) @@ -950,6 +1019,10 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @copysign_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @copysignf(float [[IN:%.*]], float [[IN]]) @@ -1006,6 +1079,10 @@ define void @cos_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cos_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cos( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1059,6 +1136,10 @@ define void @cos_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_cosf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cos_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cosf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1115,6 +1196,10 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cosh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_cosh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1168,6 +1253,10 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_coshf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cosh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_coshf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1224,6 +1313,10 @@ define void @cospi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cospi_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cospi_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @cospi(double [[IN:%.*]]) @@ -1277,6 +1370,10 @@ define void @cospi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cospi_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cospi_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @cospif(float [[IN:%.*]]) @@ -1333,6 +1430,10 @@ define void @erf_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erf_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erf(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @erf_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1386,6 +1487,10 @@ define void @erf_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_erff(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erf_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_erff(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @erf_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erff( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1442,6 +1547,10 @@ define void @erfc_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erfc_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_erfc(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @erfc_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erfc( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1495,6 +1604,10 @@ define void @erfc_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_erfcf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @erfc_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_erfcf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @erfc_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_erfcf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1551,6 +1664,10 @@ define void @exp_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1604,6 +1721,10 @@ define void @exp_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_expf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1660,6 +1781,10 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp10_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1713,6 +1838,10 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp10f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp10_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1769,6 +1898,10 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp2_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1822,6 +1955,10 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp2f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp2_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1878,6 +2015,10 @@ define void @expm1_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @expm1_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_expm1(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @expm1_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expm1( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1931,6 +2072,10 @@ define void @expm1_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expm1f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @expm1_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_expm1f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @expm1_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_expm1f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -1987,6 +2132,10 @@ define void @fdim_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fdim_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fdim_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fdim(double [[IN:%.*]], double [[IN]]) @@ -2040,6 +2189,10 @@ define void @fdim_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fdim_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fdim_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fdimf(float [[IN:%.*]], float [[IN]]) @@ -2096,6 +2249,10 @@ define void @fma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fma_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fma(double [[IN:%.*]], double [[IN]], double [[IN]]) @@ -2149,6 +2306,10 @@ define void @fma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fma_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fmaf(float [[IN:%.*]], float [[IN]], float [[IN]]) @@ -2205,6 +2366,10 @@ define void @fmax_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmax_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fmax_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fmax(double [[IN:%.*]], double [[IN]]) @@ -2258,6 +2423,10 @@ define void @fmax_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmax_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fmax_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fmaxf(float [[IN:%.*]], float [[IN]]) @@ -2314,6 +2483,10 @@ define void @fmin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmin_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fmin_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fmin(double [[IN:%.*]], double [[IN]]) @@ -2367,6 +2540,10 @@ define void @fmin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmin_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fmin_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fminf(float [[IN:%.*]], float [[IN]]) @@ -2423,6 +2600,10 @@ define void @fmod_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmod_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fmod_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @fmod(double [[IN:%.*]], double [[IN]]) @@ -2476,6 +2657,10 @@ define void @fmod_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fmod_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @fmod_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @fmodf(float [[IN:%.*]], float [[IN]]) @@ -2532,6 +2717,10 @@ define void @hypot_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @hypot_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_hypot(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @hypot_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_hypot( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) @@ -2585,6 +2774,10 @@ define void @hypot_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_hypotf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @hypot_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_hypotf(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @hypot_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_hypotf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) @@ -2641,6 +2834,10 @@ define void @ilogb_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ilogb_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @ilogb_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call i32 @ilogb(double [[IN:%.*]]) @@ -2694,6 +2891,10 @@ define void @ilogb_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ilogb_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @ilogb_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call i32 @ilogbf(float [[IN:%.*]]) @@ -2750,6 +2951,10 @@ define void @ldexp_f64(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias % ; LIBMVEC-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ldexp_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @ldexp_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @ldexp(double [[IN1:%.*]], i32 [[IN2:%.*]]) @@ -2805,6 +3010,10 @@ define void @ldexp_f32(ptr noalias %in1.ptr, ptr noalias %in2.ptr, ptr noalias % ; LIBMVEC-NEON-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ldexp_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @ldexp_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN1_PTR:%.*]], ptr noalias [[IN2_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @ldexpf(float [[IN1:%.*]], i32 [[IN2:%.*]]) @@ -2863,6 +3072,10 @@ define void @lgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @lgamma_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @lgamma_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @lgamma(double [[IN:%.*]]) @@ -2916,6 +3129,10 @@ define void @lgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @lgamma_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @lgamma_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @lgammaf(float [[IN:%.*]]) @@ -2972,6 +3189,10 @@ define void @log_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3025,6 +3246,10 @@ define void @log_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_logf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_logf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3081,6 +3306,10 @@ define void @log10_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log10_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3134,6 +3363,10 @@ define void @log10_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log10f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log10_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3190,6 +3423,10 @@ define void @log1p_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log1p_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log1p(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log1p_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log1p( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3243,6 +3480,10 @@ define void @log1p_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log1pf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log1p_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log1pf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log1p_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log1pf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3299,6 +3540,10 @@ define void @log2_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log2_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3352,6 +3597,10 @@ define void @log2_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log2f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log2_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3411,6 +3660,10 @@ define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @modf_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @modf_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) @@ -3463,6 +3716,10 @@ define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @modf_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @modf_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) @@ -3518,6 +3775,10 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nextafter_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @nextafter_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) @@ -3571,6 +3832,10 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nextafter_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) +; ; LIBMVEC-SVE-LABEL: define void @nextafter_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) @@ -3627,6 +3892,10 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @pow_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3680,6 +3949,10 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_powf(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @pow_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_MASKED_LOAD:%.*]], [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3736,6 +4009,10 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sin_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sin( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3789,6 +4066,10 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sin_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -3848,6 +4129,10 @@ define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincos_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sincos_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -3899,6 +4184,10 @@ define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincos_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sincos_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -3956,6 +4245,10 @@ define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincospi_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sincospi_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -4007,6 +4300,10 @@ define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; LIBMVEC-NEON-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sincospi_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sincospi_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) @@ -4061,6 +4358,10 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sinh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -4114,6 +4415,10 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinhf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sinh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_sinhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -4170,6 +4475,10 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinpi_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sinpi_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) @@ -4223,6 +4532,10 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinpi_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sinpi_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) @@ -4279,6 +4592,10 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sqrt_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) @@ -4332,6 +4649,10 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sqrt_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) @@ -4388,6 +4709,10 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tan_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tan( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -4441,6 +4766,10 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tan_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -4497,6 +4826,10 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tanh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanh( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -4550,6 +4883,10 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanhf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tanh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[TMP12:%.*]] = call @_ZGVsMxv_tanhf( [[WIDE_MASKED_LOAD:%.*]], [[ACTIVE_LANE_MASK:%.*]]) @@ -4606,6 +4943,10 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tgamma_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tgamma_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) @@ -4659,6 +5000,10 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-NEON: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tgamma_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { +; LIBMVEC-NEON-WIDTH-2: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tgamma_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; LIBMVEC-SVE: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll index 792f45ac57079..f6f2e39594dd8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(acos|asin|atan|atan2|cos|cosh|exp|log|sin|sinh|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|tan|tanh|trunc)" --version 2 ; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=LIBMVEC-NEON +; RUN: opt -mattr=+neon -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s --check-prefix=LIBMVEC-NEON-WIDTH-2 ; RUN: opt -mattr=+sve -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=LIBMVEC-SVE ; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON ; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE @@ -22,6 +23,10 @@ define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @acos_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_acos( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -66,6 +71,10 @@ define void @acos_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @acos_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_acosf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @acos_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_acosf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -113,6 +122,10 @@ define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @asin_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_asin( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -157,6 +170,10 @@ define void @asin_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @asin_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_asinf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @asin_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_asinf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -204,6 +221,10 @@ define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @atan_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_atan( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -248,6 +269,10 @@ define void @atan_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_atanf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @atan_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_atanf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -295,6 +320,10 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @atan2_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_atan2( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) @@ -339,6 +368,10 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @atan2_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_atan2f(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @atan2_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_atan2f( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) @@ -386,6 +419,10 @@ define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ceil_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @ceil_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -429,6 +466,10 @@ define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @ceil_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @ceil_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -475,6 +516,10 @@ define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @copysign_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @llvm.copysign.nxv2f64( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]]) @@ -519,6 +564,10 @@ define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @copysign_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @copysign_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @llvm.copysign.nxv4f32( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]]) @@ -566,6 +615,10 @@ define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cos_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_cos( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -610,6 +663,10 @@ define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cos_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_cosf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cos_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_cosf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -657,6 +714,10 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cosh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_cosh( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -701,6 +762,10 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @cosh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_coshf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @cosh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_coshf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -748,6 +813,10 @@ define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -792,6 +861,10 @@ define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_expf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_expf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -839,6 +912,10 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp10_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp10( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -883,6 +960,10 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp10_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp10f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp10_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp10f( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -930,6 +1011,10 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp2_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp2( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -974,6 +1059,10 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @exp2_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_exp2f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @exp2_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_exp2f( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -1021,6 +1110,10 @@ define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fabs_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @fabs_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1064,6 +1157,10 @@ define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fabs_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @fabs_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1110,6 +1207,10 @@ define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @floor_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @floor_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1153,6 +1254,10 @@ define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @floor_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @floor_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1199,6 +1304,10 @@ define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @fma_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]]) @@ -1242,6 +1351,10 @@ define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @fma_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @fma_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]]) @@ -1288,6 +1401,10 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -1332,6 +1449,10 @@ define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_logf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_logf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -1379,6 +1500,10 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log10_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log10( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -1423,6 +1548,10 @@ define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log10_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log10f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log10_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log10f( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -1470,6 +1599,10 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log2_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log2( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -1514,6 +1647,10 @@ define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @log2_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_log2f(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @log2_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_log2f( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -1561,6 +1698,10 @@ define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @maxnum_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @maxnum_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -1604,6 +1745,10 @@ define void @maxnum_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @maxnum_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @maxnum_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -1650,6 +1795,10 @@ define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @minnum_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @minnum_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) @@ -1693,6 +1842,10 @@ define void @minnum_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @minnum_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.minnum.v2f32(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @minnum_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) @@ -1739,6 +1892,10 @@ define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nearbyint_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @nearbyint_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1782,6 +1939,10 @@ define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @nearbyint_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @nearbyint_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -1828,6 +1989,10 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @pow_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_pow( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) @@ -1872,6 +2037,10 @@ define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @pow_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2vv_powf(<2 x float> [[WIDE_LOAD:%.*]], <2 x float> [[WIDE_LOAD]]) +; ; LIBMVEC-SVE-LABEL: define void @pow_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxvv_powf( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], splat (i1 true)) @@ -1919,6 +2088,10 @@ define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @rint_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @rint_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -1962,6 +2135,10 @@ define void @rint_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @rint_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @rint_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2008,6 +2185,10 @@ define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @round_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @round_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2051,6 +2232,10 @@ define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @round_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @round_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2097,6 +2282,10 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sin_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sin( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2141,6 +2330,10 @@ define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sin_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sin_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sinf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2188,6 +2381,10 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sinh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sinh( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2232,6 +2429,10 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sinh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_sinhf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sinh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_sinhf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2279,6 +2480,10 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sqrt_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2322,6 +2527,10 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @sqrt_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @sqrt_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) @@ -2368,6 +2577,10 @@ define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tan_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tan( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2412,6 +2625,10 @@ define void @tan_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tan_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tan_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tanf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2459,6 +2676,10 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tanh_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tanh( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2503,6 +2724,10 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call aarch64_vector_pcs <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @tanh_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call aarch64_vector_pcs <2 x float> @_ZGVnN2v_tanhf(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @tanh_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP8:%.*]] = call @_ZGVsMxv_tanhf( [[WIDE_LOAD:%.*]], splat (i1 true)) @@ -2550,6 +2775,10 @@ define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @trunc_f64 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @trunc_f64 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]]) @@ -2593,6 +2822,10 @@ define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) { ; LIBMVEC-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-NEON: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]]) ; +; LIBMVEC-NEON-WIDTH-2-LABEL: define void @trunc_f32 +; LIBMVEC-NEON-WIDTH-2-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { +; LIBMVEC-NEON-WIDTH-2: [[TMP2:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) +; ; LIBMVEC-SVE-LABEL: define void @trunc_f32 ; LIBMVEC-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] { ; LIBMVEC-SVE: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])