-
Notifications
You must be signed in to change notification settings - Fork 14.7k
Reland [VectorUtils] Trivially vectorize ldexp, [l]lround #152476
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesChanges: The original patch, landed as 1336675, was reverted due to a bug in LoopVectorize resulting in a crash. The bug has now been fixed by 95c32bf ([VPlan] Return invalid cost if any skeleton block has invalid costs), and this reland is identical to the original patch. Patch is 43.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152476.diff 5 Files Affected:
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index b3b4c37475eef..425ea311d653a 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::exp:
case Intrinsic::exp10:
case Intrinsic::exp2:
+ case Intrinsic::ldexp:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
@@ -108,6 +109,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::canonicalize:
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
+ case Intrinsic::lround:
+ case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint:
case Intrinsic::ucmp:
@@ -189,6 +192,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
switch (ID) {
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
+ case Intrinsic::lround:
+ case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint:
case Intrinsic::vp_lrint:
@@ -203,6 +208,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
case Intrinsic::vp_is_fpclass:
return OpdIdx == 0;
case Intrinsic::powi:
+ case Intrinsic::ldexp:
return OpdIdx == -1 || OpdIdx == 1;
default:
return OpdIdx == -1;
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index 9c910d70807a1..10d83a456d0e2 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -324,6 +324,56 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.exp2.f64(double)
+define void @ldexp_f32i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
+; CHECK-LABEL: @ldexp_f32i32(
+; CHECK: llvm.ldexp.v4f32.v4i32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.ldexp.f32.i32(float %0, i32 %exp)
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i32 %iv
+ store float %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @llvm.ldexp.f32.i32(float, i32)
+
+define void @ldexp_f64i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
+; CHECK-LABEL: @ldexp_f64i32(
+; CHECK: llvm.ldexp.v4f64.v4i32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call double @llvm.ldexp.f64.i32(double %0, i32 %exp)
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i32 %iv
+ store double %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare double @llvm.ldexp.f64.i32(double, i32)
+
define void @log_f32(i32 %n, ptr %y, ptr %x) {
; CHECK-LABEL: @log_f32(
; CHECK: llvm.log.v4f32
@@ -976,6 +1026,157 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.roundeven.f64(double)
+
+define void @lround_i32f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i32f32(
+; CHECK: llvm.lround.v4i32.v4f32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call i32 @llvm.lround.i32.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
+ store i32 %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i32 @llvm.lround.i32.f32(float)
+
+define void @lround_i32f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i32f64(
+; CHECK: llvm.lround.v4i32.v4f64
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call i32 @llvm.lround.i32.f64(double %0)
+ %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
+ store i32 %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i32 @llvm.lround.i32.f64(double)
+
+define void @lround_i64f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i64f32(
+; CHECK: llvm.lround.v4i64.v4f32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call i64 @llvm.lround.i64.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.lround.i64.f32(float)
+
+define void @lround_i64f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i64f64(
+; CHECK: llvm.lround.v4i64.v4f64
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call i64 @llvm.lround.i64.f64(double %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.lround.i64.f64(double)
+
+define void @llround_i64f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @llround_i64f32(
+; CHECK: llvm.llround.v4i64.v4f32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call i64 @llvm.llround.i64.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.llround.i64.f32(float)
+
+define void @llround_i64f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @llround_i64f64(
+; CHECK: llvm.llround.v4i64.v4f64
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call i64 @llvm.llround.i64.f64(double %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.llround.i64.f64(double)
+
define void @fma_f32(i32 %n, ptr %y, ptr %x, ptr %z, ptr %w) {
; CHECK-LABEL: @fma_f32(
; CHECK: llvm.fma.v4f32
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll
new file mode 100644
index 0000000000000..301e5da28f0d8
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @ldexp_f32i32(ptr %x, ptr %y, i32 %exp) {
+; CHECK-LABEL: @ldexp_f32i32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L0]], i32 [[EXP:%.*]])
+; CHECK-NEXT: [[L3:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L2]], i32 [[EXP]])
+; CHECK-NEXT: [[L5:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L4]], i32 [[EXP]])
+; CHECK-NEXT: [[L7:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L6]], i32 [[EXP]])
+; CHECK-NEXT: store float [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 1
+; CHECK-NEXT: store float [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 2
+; CHECK-NEXT: store float [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 3
+; CHECK-NEXT: store float [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call float @llvm.ldexp.f32.i32(float %l0, i32 %exp)
+ %l3 = tail call float @llvm.ldexp.f32.i32(float %l2, i32 %exp)
+ %l5 = tail call float @llvm.ldexp.f32.i32(float %l4, i32 %exp)
+ %l7 = tail call float @llvm.ldexp.f32.i32(float %l6, i32 %exp)
+ store float %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds float, ptr %y, i64 1
+ store float %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds float, ptr %y, i64 2
+ store float %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds float, ptr %y, i64 3
+ store float %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @ldexp_f64i32(ptr %x, ptr %y, i32 %exp) {
+; CHECK-LABEL: @ldexp_f64i32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L0]], i32 [[EXP:%.*]])
+; CHECK-NEXT: [[L3:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L2]], i32 [[EXP]])
+; CHECK-NEXT: [[L5:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L4]], i32 [[EXP]])
+; CHECK-NEXT: [[L7:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L6]], i32 [[EXP]])
+; CHECK-NEXT: store double [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 1
+; CHECK-NEXT: store double [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 2
+; CHECK-NEXT: store double [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 3
+; CHECK-NEXT: store double [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load double, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1
+ %l2 = load double, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2
+ %l4 = load double, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3
+ %l6 = load double, ptr %arrayidx.3, align 4
+ %l1 = tail call double @llvm.ldexp.f64.i32(double %l0, i32 %exp)
+ %l3 = tail call double @llvm.ldexp.f64.i32(double %l2, i32 %exp)
+ %l5 = tail call double @llvm.ldexp.f64.i32(double %l4, i32 %exp)
+ %l7 = tail call double @llvm.ldexp.f64.i32(double %l6, i32 %exp)
+ store double %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds double, ptr %y, i64 1
+ store double %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds double, ptr %y, i64 2
+ store double %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds double, ptr %y, i64 3
+ store double %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @ldexp_f32i64(ptr %x, ptr %y, i64 %exp) {
+; CHECK-LABEL: @ldexp_f32i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L0]], i64 [[EXP:%.*]])
+; CHECK-NEXT: [[L3:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L2]], i64 [[EXP]])
+; CHECK-NEXT: [[L5:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L4]], i64 [[EXP]])
+; CHECK-NEXT: [[L7:%.*]] = tail call float @llvm.ldexp.f32.i64(float [[L6]], i64 [[EXP]])
+; CHECK-NEXT: store float [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 1
+; CHECK-NEXT: store float [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 2
+; CHECK-NEXT: store float [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 3
+; CHECK-NEXT: store float [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call float @llvm.ldexp.f32.i64(float %l0, i64 %exp)
+ %l3 = tail call float @llvm.ldexp.f32.i64(float %l2, i64 %exp)
+ %l5 = tail call float @llvm.ldexp.f32.i64(float %l4, i64 %exp)
+ %l7 = tail call float @llvm.ldexp.f32.i64(float %l6, i64 %exp)
+ store float %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds float, ptr %y, i64 1
+ store float %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds float, ptr %y, i64 2
+ store float %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds float, ptr %y, i64 3
+ store float %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @ldexp_f64i64(ptr %x, ptr %y, i64 %exp) {
+; CHECK-LABEL: @ldexp_f64i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L0]], i64 [[EXP:%.*]])
+; CHECK-NEXT: [[L3:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L2]], i64 [[EXP]])
+; CHECK-NEXT: [[L5:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L4]], i64 [[EXP]])
+; CHECK-NEXT: [[L7:%.*]] = tail call double @llvm.ldexp.f64.i64(double [[L6]], i64 [[EXP]])
+; CHECK-NEXT: store double [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 1
+; CHECK-NEXT: store double [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 2
+; CHECK-NEXT: store double [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 3
+; CHECK-NEXT: store double [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load double, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1
+ %l2 = load double, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2
+ %l4 = load double, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3
+ %l6 = load double, ptr %arrayidx.3, align 4
+ %l1 = tail call double @llvm.ldexp.f64.i64(double %l0, i64 %exp)
+ %l3 = tail call double @llvm.ldexp.f64.i64(double %l2, i64 %exp)
+ %l5 = tail call double @llvm.ldexp.f64.i64(double %l4, i64 %exp)
+ %l7 = tail call double @llvm.ldexp.f64.i64(double %l6, i64 %exp)
+ store double %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds double, ptr %y, i64 1
+ store double %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds double, ptr %y, i64 2
+ store double %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds double, ptr %y, i64 3
+ store double %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @ldexp_f32i32_i64(ptr %x, ptr %y, i32 %exp32, i64 %exp64) {
+; CHECK-LABEL: @ldexp_f32i32_i64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.l...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks. The issue that caused the revert has been fixed, should be good to go now
Changes: The original patch, landed as 1336675, was reverted due to a bug in LoopVectorize resulting in a crash. The bug has now been fixed by 95c32bf ([VPlan] Return invalid cost if any skeleton block has invalid costs), and this reland is identical to the original patch.