Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions llvm/lib/CodeGen/ReplaceWithVeclib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,34 @@ static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
IntrinsicInst *II) {
assert(II != nullptr && "Intrinsic cannot be null");
Intrinsic::ID IID = II->getIntrinsicID();
Type *RetTy = II->getType();
Type *ScalarRetTy = RetTy->getScalarType();
// At the moment VFABI assumes the return type is always widened unless it is
// a void type.
auto *VTy = dyn_cast<VectorType>(II->getType());
auto *VTy = dyn_cast<VectorType>(RetTy);
ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));

// OloadTys collects types used in scalar intrinsic overload name.
SmallVector<Type *, 3> OloadTys;
if (!RetTy->isVoidTy() && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
OloadTys.push_back(ScalarRetTy);

// Compute the argument types of the corresponding scalar call and check that
// all vector operands match the previously found EC.
SmallVector<Type *, 8> ScalarArgTypes;
Intrinsic::ID IID = II->getIntrinsicID();
for (auto Arg : enumerate(II->args())) {
auto *ArgTy = Arg.value()->getType();
bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index());
if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
ScalarArgTypes.push_back(ArgTy);
if (IsOloadTy)
OloadTys.push_back(ArgTy);
} else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
ScalarArgTypes.push_back(VectorArgTy->getElementType());
auto *ScalarArgTy = VectorArgTy->getElementType();
ScalarArgTypes.push_back(ScalarArgTy);
if (IsOloadTy)
OloadTys.push_back(ScalarArgTy);
// When return type is void, set EC to the first vector argument, and
// disallow vector arguments with different ECs.
if (EC.isZero())
Expand All @@ -129,7 +143,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// using scalar argument types.
std::string ScalarName =
Intrinsic::isOverloaded(IID)
? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
? Intrinsic::getName(IID, OloadTys, II->getModule())
: Intrinsic::getName(IID).str();

// Try to find the mapping for the scalar version of this intrinsic and the
Expand All @@ -146,7 +160,6 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,

// Replace the call to the intrinsic with a call to the vector library
// function.
Type *ScalarRetTy = II->getType()->getScalarType();
FunctionType *ScalarFTy =
FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
const std::string MangledName = VD->getVectorFunctionABIVariantString();
Expand Down Expand Up @@ -193,6 +206,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
for (auto &I : instructions(F)) {
// Process only intrinsic calls that return void or a vector.
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::not_intrinsic)
continue;
if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
continue;

Expand Down
17 changes: 5 additions & 12 deletions llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)

;.
; CHECK: @llvm.compiler.used = appending global [60 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [64 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"

;.
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
Expand Down Expand Up @@ -333,17 +333,10 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
declare <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)

;
; There is a bug in the replace-with-veclib pass, and for intrinsics which take
; more than one arguments, but has just one overloaded type, it incorrectly
; reconstructs the scalar name, for pow specifically it is searching for:
; llvm.pow.f64.f64 and llvm.pow.f32.f32
;

define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
; CHECK-LABEL: define <2 x double> @llvm_pow_f64
; CHECK-SAME: (<2 x double> [[IN:%.*]], <2 x double> [[POWER:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN]], <2 x double> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power)
Expand All @@ -353,7 +346,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
; CHECK-LABEL: define <4 x float> @llvm_pow_f32
; CHECK-SAME: (<4 x float> [[IN:%.*]], <4 x float> [[POWER:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN]], <4 x float> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power)
Expand All @@ -363,7 +356,7 @@ define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_pow_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POWER:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power)
Expand All @@ -373,7 +366,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs
define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_pow_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POWER:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"

;.
; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
;.
define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_vscale_f64(
Expand Down Expand Up @@ -278,7 +278,7 @@ define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(<vscale x 4 x float> %in)

define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) {
; CHECK-LABEL: @llvm_pow_vscale_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow)
Expand All @@ -287,7 +287,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs

define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) {
; CHECK-LABEL: @llvm_pow_vscale_f32(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"

;.
; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
;.
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_f64(
Expand Down Expand Up @@ -278,7 +278,7 @@ define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) {

define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
; CHECK-LABEL: @llvm_pow_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow)
Expand All @@ -287,7 +287,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {

define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) {
; CHECK-LABEL: @llvm_pow_f32(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)
Expand Down
Loading