diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 3a761bc4e8119..d429fe96f9bec 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1451,7 +1451,7 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); - if (Ty) { + if (Ty && Ty != V->getType()) { assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && "non-trivial casts should be done with the SCEVs directly!"); V = InsertNoopCastOfTo(V, Ty); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2679ed6b26b5d..6b746a48c46ec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3324,7 +3324,7 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) { } const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); - SCEVExpander Exp(SE, DL, "induction"); + SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true); Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), &*State.Builder.GetInsertPoint()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index e40af3e2e3d30..1a7322ec0aff6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -30,11 +30,18 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, VPValue *Expanded = nullptr; if (auto *E = dyn_cast(Expr)) Expanded = Plan.getOrAddLiveIn(E->getValue()); - else if (auto *E = dyn_cast(Expr)) - Expanded = Plan.getOrAddLiveIn(E->getValue()); else { - Expanded = new VPExpandSCEVRecipe(Expr, SE); - Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); + auto *U = dyn_cast(Expr); + // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction + // value. Otherwise the value may be defined in a loop and using it directly + // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA + // form. + if (U && !isa(U->getValue())) { + Expanded = Plan.getOrAddLiveIn(U->getValue()); + } else { + Expanded = new VPExpandSCEVRecipe(Expr, SE); + Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); + } } Plan.addSCEVExpansion(Expr, Expanded); return Expanded; diff --git a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll new file mode 100644 index 0000000000000..868e07aa0490e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-vectorize -vectorizer-min-trip-count=8 -mcpu=skylake-avx512 -S %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; Test case for https://github.com/llvm/llvm-project/issues/121518. Make sure +; that we preserve LCSSA form when using %iv.1 from loop.1 in the trip count +; expression when vectorizing loop.2 +define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr %dst) { +; CHECK-LABEL: define void @value_defined_in_loop1_used_for_trip_counts( +; CHECK-SAME: i32 [[START:%.*]], i1 [[C:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], i32 0, i32 7 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SELECT]] to i64 +; CHECK-NEXT: br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ZEXT]], %[[LOOP_1]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP_1_EXIT:.*]], label %[[LOOP_1]] +; CHECK: [[LOOP_1_EXIT]]: +; CHECK-NEXT: [[IV_1_LCSSA2:%.*]] = phi i64 [ [[IV_1]], %[[LOOP_1]] ] +; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i64 [ [[IV_1]], %[[LOOP_1]] ] +; CHECK-NEXT: br i1 [[C]], label %[[LOOP_2_PREHEADER:.*]], label %[[LOOP_3_PREHEADER:.*]] +; CHECK: [[LOOP_3_PREHEADER]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[IV_1_LCSSA2]], 15 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[IV_1_LCSSA2]], 1 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i64> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> zeroinitializer, ptr [[TMP2]], i32 1, <16 x i1> [[TMP0]]) +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT_1_LOOPEXIT1:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_3_PREHEADER]] ] +; CHECK-NEXT: br label %[[LOOP_3:.*]] +; CHECK: [[LOOP_2_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_2:.*]] +; CHECK: [[LOOP_2]]: +; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ 0, %[[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP_2]] ], [ [[START]], %[[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[IV_3_NEXT]] = add i32 [[IV_3]], 1 +; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], 1 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[IV_3]], 1 +; CHECK-NEXT: [[ZEXT8:%.*]] = zext i32 [[SHL]] to i64 +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[ZEXT8]] +; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2 +; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_2]], [[IV_1_LCSSA]] +; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT_1_LOOPEXIT:.*]] +; CHECK: [[LOOP_3]]: +; CHECK-NEXT: [[IV_4:%.*]] = phi i64 [ [[IV_4_NEXT:%.*]], %[[LOOP_3]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_4]] +; CHECK-NEXT: store i8 0, ptr [[GEP_DST_2]], align 1 +; CHECK-NEXT: [[IV_4_NEXT]] = add i64 [[IV_4]], 1 +; CHECK-NEXT: [[EC_3:%.*]] = icmp ult i64 [[IV_4_NEXT]], [[IV_1_LCSSA]] +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP_3]], label %[[EXIT_1_LOOPEXIT1]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[EXIT_1_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT_1:.*]] +; CHECK: [[EXIT_1_LOOPEXIT1]]: +; CHECK-NEXT: br label %[[EXIT_1]] +; CHECK: [[EXIT_1]]: +; CHECK-NEXT: ret void +; +entry: + %select = select i1 %c, i32 0, i32 7 + %zext = zext i32 %select to i64 + br label %loop.1 + +loop.1: + %iv.1 = phi i64 [ 0, %entry ], [ %zext, %loop.1 ] + br i1 false, label %loop.1.exit, label %loop.1 + +loop.1.exit: + br i1 %c, label %loop.2, label %loop.3 + +loop.2: + %iv.2 = phi i64 [ 0, %loop.1.exit ], [ %iv.2.next, %loop.2 ] + %iv.3 = phi i32 [ %start, %loop.1.exit ], [ %iv.3.next, %loop.2 ] + %iv.3.next = add i32 %iv.3, 1 + %iv.2.next = add i64 %iv.2, 1 + %shl = shl i32 %iv.3, 1 + %zext8 = zext i32 %shl to i64 + %gep.dst = getelementptr i8, ptr %dst, i64 %zext8 + store i16 0, ptr %gep.dst, align 2 + %ec.2 = icmp ult i64 %iv.2, %iv.1 + br i1 %ec.2, label %loop.2, label %exit.1 + +loop.3: + %iv.4 = phi i64 [ 0, %loop.1.exit ], [ %iv.4.next, %loop.3 ] + %gep.dst.2 = getelementptr i8, ptr %dst, i64 %iv.4 + store i8 0, ptr %gep.dst.2, align 1 + %iv.4.next = add i64 %iv.4, 1 + %ec.3 = icmp ult i64 %iv.4.next, %iv.1 + br i1 %ec.3, label %loop.3, label %exit.1 + +exit.1: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} +;. diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 601c475df56cd..29b29c500c46e 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -842,14 +842,16 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 -; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], splat (float 4.000000e+00) +; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast <4 x float> [[DOTSPLATINSERT2]], +; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 ; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], ; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]] +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 +; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]