diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index b3b4c37475eef..425ea311d653a 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::exp: case Intrinsic::exp10: case Intrinsic::exp2: + case Intrinsic::ldexp: case Intrinsic::log: case Intrinsic::log10: case Intrinsic::log2: @@ -108,6 +109,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::canonicalize: case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: + case Intrinsic::lround: + case Intrinsic::llround: case Intrinsic::lrint: case Intrinsic::llrint: case Intrinsic::ucmp: @@ -189,6 +192,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( switch (ID) { case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: + case Intrinsic::lround: + case Intrinsic::llround: case Intrinsic::lrint: case Intrinsic::llrint: case Intrinsic::vp_lrint: @@ -203,6 +208,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( case Intrinsic::vp_is_fpclass: return OpdIdx == 0; case Intrinsic::powi: + case Intrinsic::ldexp: return OpdIdx == -1 || OpdIdx == 1; default: return OpdIdx == -1; diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 9c910d70807a1..10d83a456d0e2 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -324,6 +324,56 @@ for.end: ; preds = %for.body, %entry declare double @llvm.exp2.f64(double) +define void @ldexp_f32i32(i32 %n, ptr %y, ptr %x, i32 %exp) { +; CHECK-LABEL: @ldexp_f32i32( +; CHECK: llvm.ldexp.v4f32.v4i32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call float @llvm.ldexp.f32.i32(float %0, i32 %exp) + %arrayidx2 = getelementptr inbounds float, ptr %x, i32 %iv + store float %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.ldexp.f32.i32(float, i32) + +define void @ldexp_f64i32(i32 %n, ptr %y, ptr %x, i32 %exp) { +; CHECK-LABEL: @ldexp_f64i32( +; CHECK: llvm.ldexp.v4f64.v4i32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call double @llvm.ldexp.f64.i32(double %0, i32 %exp) + %arrayidx2 = getelementptr inbounds double, ptr %x, i32 %iv + store double %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.ldexp.f64.i32(double, i32) + define void @log_f32(i32 %n, ptr %y, ptr %x) { ; CHECK-LABEL: @log_f32( ; CHECK: llvm.log.v4f32 @@ -976,6 +1026,157 @@ for.end: ; preds = %for.body, %entry declare double @llvm.roundeven.f64(double) + +define void @lround_i32f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i32f32( +; CHECK: llvm.lround.v4i32.v4f32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call i32 @llvm.lround.i32.f32(float %0) + %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv + store i32 %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i32 @llvm.lround.i32.f32(float) + +define void @lround_i32f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i32f64( +; CHECK: llvm.lround.v4i32.v4f64 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call i32 @llvm.lround.i32.f64(double %0) + %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv + store i32 %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i32 @llvm.lround.i32.f64(double) + +define void @lround_i64f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i64f32( +; CHECK: llvm.lround.v4i64.v4f32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call i64 @llvm.lround.i64.f32(float %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.lround.i64.f32(float) + +define void @lround_i64f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @lround_i64f64( +; CHECK: llvm.lround.v4i64.v4f64 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call i64 @llvm.lround.i64.f64(double %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.lround.i64.f64(double) + +define void @llround_i64f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @llround_i64f32( +; CHECK: llvm.llround.v4i64.v4f32 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call i64 @llvm.llround.i64.f32(float %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 4 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.llround.i64.f32(float) + +define void @llround_i64f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @llround_i64f64( +; CHECK: llvm.llround.v4i64.v4f64 +; CHECK: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call i64 @llvm.llround.i64.f64(double %0) + %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv + store i64 %call, ptr %arrayidx2, align 8 + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.llround.i64.f64(double) + define void @fma_f32(i32 %n, ptr %y, ptr %x, ptr %z, ptr %w) { ; CHECK-LABEL: @fma_f32( ; CHECK: llvm.fma.v4f32 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll new file mode 100644 index 0000000000000..301e5da28f0d8 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll @@ -0,0 +1,279 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64 < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @ldexp_f32i32(ptr %x, ptr %y, i32 %exp) { +; CHECK-LABEL: @ldexp_f32i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L0]], i32 [[EXP:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L2]], i32 [[EXP]]) +; CHECK-NEXT: [[L5:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L4]], i32 [[EXP]]) +; CHECK-NEXT: [[L7:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L6]], i32 [[EXP]]) +; CHECK-NEXT: store float [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 1 +; CHECK-NEXT: store float [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 2 +; CHECK-NEXT: store float [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 3 +; CHECK-NEXT: store float [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call float @llvm.ldexp.f32.i32(float %l0, i32 %exp) + %l3 = tail call float @llvm.ldexp.f32.i32(float %l2, i32 %exp) + %l5 = tail call float @llvm.ldexp.f32.i32(float %l4, i32 %exp) + %l7 = tail call float @llvm.ldexp.f32.i32(float %l6, i32 %exp) + store float %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds float, ptr %y, i64 1 + store float %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds float, ptr %y, i64 2 + store float %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds float, ptr %y, i64 3 + store float %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @ldexp_f64i32(ptr %x, ptr %y, i32 %exp) { +; CHECK-LABEL: @ldexp_f64i32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L0]], i32 [[EXP:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L2]], i32 [[EXP]]) +; CHECK-NEXT: [[L5:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L4]], i32 [[EXP]]) +; CHECK-NEXT: [[L7:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L6]], i32 [[EXP]]) +; CHECK-NEXT: store double [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 1 +; CHECK-NEXT: store double [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 2 +; CHECK-NEXT: store double [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 3 +; CHECK-NEXT: store double [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call double @llvm.ldexp.f64.i32(double %l0, i32 %exp) + %l3 = tail call double @llvm.ldexp.f64.i32(double %l2, i32 %exp) + %l5 = tail call double @llvm.ldexp.f64.i32(double %l4, i32 %exp) + %l7 = tail call double @llvm.ldexp.f64.i32(double %l6, i32 %exp) + store double %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds double, ptr %y, i64 1 + store double %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds double, ptr %y, i64 2 + store double %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds double, ptr %y, i64 3 + store double %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @ldexp_f32i64(ptr %x, ptr %y, i64 %exp) { +; CHECK-LABEL: @ldexp_f32i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L0]], i64 [[EXP:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L2]], i64 [[EXP]]) +; CHECK-NEXT: [[L5:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L4]], i64 [[EXP]]) +; CHECK-NEXT: [[L7:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L6]], i64 [[EXP]]) +; CHECK-NEXT: store float [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 1 +; CHECK-NEXT: store float [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 2 +; CHECK-NEXT: store float [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 3 +; CHECK-NEXT: store float [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call float @llvm.ldexp.f32.i64(float %l0, i64 %exp) + %l3 = tail call float @llvm.ldexp.f32.i64(float %l2, i64 %exp) + %l5 = tail call float @llvm.ldexp.f32.i64(float %l4, i64 %exp) + %l7 = tail call float @llvm.ldexp.f32.i64(float %l6, i64 %exp) + store float %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds float, ptr %y, i64 1 + store float %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds float, ptr %y, i64 2 + store float %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds float, ptr %y, i64 3 + store float %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @ldexp_f64i64(ptr %x, ptr %y, i64 %exp) { +; CHECK-LABEL: @ldexp_f64i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L0]], i64 [[EXP:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L2]], i64 [[EXP]]) +; CHECK-NEXT: [[L5:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L4]], i64 [[EXP]]) +; CHECK-NEXT: [[L7:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L6]], i64 [[EXP]]) +; CHECK-NEXT: store double [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 1 +; CHECK-NEXT: store double [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 2 +; CHECK-NEXT: store double [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 3 +; CHECK-NEXT: store double [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call double @llvm.ldexp.f64.i64(double %l0, i64 %exp) + %l3 = tail call double @llvm.ldexp.f64.i64(double %l2, i64 %exp) + %l5 = tail call double @llvm.ldexp.f64.i64(double %l4, i64 %exp) + %l7 = tail call double @llvm.ldexp.f64.i64(double %l6, i64 %exp) + store double %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds double, ptr %y, i64 1 + store double %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds double, ptr %y, i64 2 + store double %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds double, ptr %y, i64 3 + store double %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @ldexp_f32i32_i64(ptr %x, ptr %y, i32 %exp32, i64 %exp64) { +; CHECK-LABEL: @ldexp_f32i32_i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L0]], i32 [[EXP32:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L2]], i32 [[EXP32]]) +; CHECK-NEXT: [[L5:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L4]], i64 [[EXP64:%.*]]) +; CHECK-NEXT: [[L7:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L6]], i64 [[EXP64]]) +; CHECK-NEXT: store float [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 1 +; CHECK-NEXT: store float [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 2 +; CHECK-NEXT: store float [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 3 +; CHECK-NEXT: store float [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call float @llvm.ldexp.f32.i32(float %l0, i32 %exp32) + %l3 = tail call float @llvm.ldexp.f32.i32(float %l2, i32 %exp32) + %l5 = tail call float @llvm.ldexp.f32.i64(float %l4, i64 %exp64) + %l7 = tail call float @llvm.ldexp.f32.i64(float %l6, i64 %exp64) + store float %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds float, ptr %y, i64 1 + store float %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds float, ptr %y, i64 2 + store float %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds float, ptr %y, i64 3 + store float %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @ldexp_f64_i32_i64(ptr %x, ptr %y, i32 %exp32, i64 %exp64) { +; CHECK-LABEL: @ldexp_f64_i32_i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L0]], i32 [[EXP32:%.*]]) +; CHECK-NEXT: [[L3:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L2]], i32 [[EXP32]]) +; CHECK-NEXT: [[L5:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L4]], i64 [[EXP64:%.*]]) +; CHECK-NEXT: [[L7:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L6]], i64 [[EXP64]]) +; CHECK-NEXT: store double [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 1 +; CHECK-NEXT: store double [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 2 +; CHECK-NEXT: store double [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 3 +; CHECK-NEXT: store double [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call double @llvm.ldexp.f64.i32(double %l0, i32 %exp32) + %l3 = tail call double @llvm.ldexp.f64.i32(double %l2, i32 %exp32) + %l5 = tail call double @llvm.ldexp.f64.i64(double %l4, i64 %exp64) + %l7 = tail call double @llvm.ldexp.f64.i64(double %l6, i64 %exp64) + store double %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds double, ptr %y, i64 1 + store double %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds double, ptr %y, i64 2 + store double %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds double, ptr %y, i64 3 + store double %l7, ptr %arrayidx2.3, align 4 + ret void +} + +declare float @llvm.ldexp.f32.i32(float, i32) +declare double @llvm.ldexp.f64.i32(double, i32) +declare float @llvm.ldexp.f32.i64(float, i64) +declare double @llvm.ldexp.f64.i64(double, i64) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll new file mode 100644 index 0000000000000..07a3fe7d0bbc5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll @@ -0,0 +1,280 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64 < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @lround_i32f32(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @lround_i32f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L6]]) +; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1 +; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2 +; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3 +; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i32 @llvm.lround.i32.f32(float %l0) + %l3 = tail call i32 @llvm.lround.i32.f32(float %l2) + %l5 = tail call i32 @llvm.lround.i32.f32(float %l4) + %l7 = tail call i32 @llvm.lround.i32.f32(float %l6) + store i32 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1 + store i32 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2 + store i32 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3 + store i32 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @lround_i32f64(ptr %x, ptr %y, i32 %n) { +; CHECK-LABEL: @lround_i32f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L6]]) +; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1 +; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2 +; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3 +; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call i32 @llvm.lround.i32.f64(double %l0) + %l3 = tail call i32 @llvm.lround.i32.f64(double %l2) + %l5 = tail call i32 @llvm.lround.i32.f64(double %l4) + %l7 = tail call i32 @llvm.lround.i32.f64(double %l6) + store i32 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1 + store i32 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2 + store i32 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3 + store i32 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @lround_i64f32(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @lround_i64f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.lround.i64.f32(float %l0) + %l3 = tail call i64 @llvm.lround.i64.f32(float %l2) + %l5 = tail call i64 @llvm.lround.i64.f32(float %l4) + %l7 = tail call i64 @llvm.lround.i64.f32(float %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @lround_i64f64(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @lround_i64f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.lround.i64.f64(double %l0) + %l3 = tail call i64 @llvm.lround.i64.f64(double %l2) + %l5 = tail call i64 @llvm.lround.i64.f64(double %l4) + %l7 = tail call i64 @llvm.lround.i64.f64(double %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @llround_i64f32(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @llround_i64f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load float, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1 + %l2 = load float, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2 + %l4 = load float, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3 + %l6 = load float, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.llround.i64.f32(float %l0) + %l3 = tail call i64 @llvm.llround.i64.f32(float %l2) + %l5 = tail call i64 @llvm.llround.i64.f32(float %l4) + %l7 = tail call i64 @llvm.llround.i64.f32(float %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +define void @llround_i64f64(ptr %x, ptr %y, i64 %n) { +; CHECK-LABEL: @llround_i64f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1 +; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2 +; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3 +; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L0]]) +; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L2]]) +; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L4]]) +; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L6]]) +; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1 +; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2 +; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3 +; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: ret void +; +entry: + %l0 = load double, ptr %x, align 4 + %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1 + %l2 = load double, ptr %arrayidx.1, align 4 + %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2 + %l4 = load double, ptr %arrayidx.2, align 4 + %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3 + %l6 = load double, ptr %arrayidx.3, align 4 + %l1 = tail call i64 @llvm.llround.i64.f64(double %l0) + %l3 = tail call i64 @llvm.llround.i64.f64(double %l2) + %l5 = tail call i64 @llvm.llround.i64.f64(double %l4) + %l7 = tail call i64 @llvm.llround.i64.f64(double %l6) + store i64 %l1, ptr %y, align 4 + %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1 + store i64 %l3, ptr %arrayidx2.1, align 4 + %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2 + store i64 %l5, ptr %arrayidx2.2, align 4 + %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3 + store i64 %l7, ptr %arrayidx2.3, align 4 + ret void +} + +declare i32 @llvm.lround.i32.f32(float) +declare i64 @llvm.lround.i64.f32(float) +declare i64 @llvm.lround.i64.f64(double) +declare i64 @llvm.llround.i64.f32(float) +declare i64 @llvm.llround.i64.f64(double) diff --git a/llvm/test/Transforms/Scalarizer/intrinsics.ll b/llvm/test/Transforms/Scalarizer/intrinsics.ll index cee44ef434260..070c765294ba1 100644 --- a/llvm/test/Transforms/Scalarizer/intrinsics.ll +++ b/llvm/test/Transforms/Scalarizer/intrinsics.ll @@ -8,6 +8,7 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) ; Ternary fp declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) @@ -32,6 +33,8 @@ declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>) ; Unary fp operand, int return type declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float>) declare <2 x i32> @llvm.llrint.v2i32.v2f32(<2 x float>) +declare <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float>) +declare <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float>) ; Bool return type, overloaded on fp operand type declare <2 x i1> @llvm.is.fpclass(<2 x float>, i32) @@ -159,6 +162,22 @@ define <2 x float> @scalarize_powi_v2f32(<2 x float> %x, i32 %y) #0 { ret <2 x float> %powi } +define <2 x float> @scalarize_ldexp_v2f32(<2 x float> %x, <2 x i32> %y) #0 { +; CHECK-LABEL: @scalarize_ldexp_v2f32( +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[Y:%.*]] = extractelement <2 x i32> [[Y1:%.*]], i64 0 +; CHECK-NEXT: [[POWI_I0:%.*]] = call float @llvm.ldexp.f32.i32(float [[X_I0]], i32 [[Y]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x i32> [[Y1]], i64 1 +; CHECK-NEXT: [[POWI_I1:%.*]] = call float @llvm.ldexp.f32.i32(float [[X_I1]], i32 [[Y_I1]]) +; CHECK-NEXT: [[POWI_UPTO0:%.*]] = insertelement <2 x float> poison, float [[POWI_I0]], i64 0 +; CHECK-NEXT: [[POWI:%.*]] = insertelement <2 x float> [[POWI_UPTO0]], float [[POWI_I1]], i64 1 +; CHECK-NEXT: ret <2 x float> [[POWI]] +; + %powi = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> %y) + ret <2 x float> %powi +} + define <2 x i32> @scalarize_smul_fix_sat_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: @scalarize_smul_fix_sat_v2i32( ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0 @@ -243,6 +262,34 @@ define <2 x i32> @scalarize_llrint(<2 x float> %x) #0 { ret <2 x i32> %rnd } +define <2 x i32> @scalarize_lround(<2 x float> %x) #0 { +; CHECK-LABEL: @scalarize_lround( +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.lround.i32.f32(float [[X_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.lround.i32.f32(float [[X_I1]]) +; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0 +; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1 +; CHECK-NEXT: ret <2 x i32> [[RND]] +; + %rnd = call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %rnd +} + +define <2 x i32> @scalarize_llround(<2 x float> %x) #0 { +; CHECK-LABEL: @scalarize_llround( +; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 +; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.llround.i32.f32(float [[X_I0]]) +; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 +; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.llround.i32.f32(float [[X_I1]]) +; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0 +; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1 +; CHECK-NEXT: ret <2 x i32> [[RND]] +; + %rnd = call <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float> %x) + ret <2 x i32> %rnd +} + define <2 x i1> @scalarize_is_fpclass(<2 x float> %x) #0 { ; CHECK-LABEL: @scalarize_is_fpclass( ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0