diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 907839711a39c..32846cad8ac42 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8887,15 +8887,18 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Update wide induction increments to use the same step as the corresponding // wide induction. This enables detecting induction increments directly in // VPlan and removes redundant splats. + SmallDenseMap MapIVs; for (const auto &[Phi, ID] : Legal->getInductionVars()) { auto *IVInc = cast( Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); - if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add) - continue; VPWidenInductionRecipe *WideIV = cast(RecipeBuilder.getRecipe(Phi)); - VPRecipeBase *R = RecipeBuilder.getRecipe(IVInc); - R->setOperand(1, WideIV->getStepValue()); + VPValue *V = RecipeBuilder.getVPValueOrAddLiveIn(IVInc); + if (!isa(IVInc)) + MapIVs[V] = WideIV; + if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add) + continue; + V->getDefiningRecipe()->setOperand(1, WideIV->getStepValue()); } DenseMap IVEndValues; @@ -8992,7 +8995,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow, WithoutRuntimeCheck); } - VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues); + VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, MapIVs); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8e05b0138eeed..b92b84105ac2f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -823,10 +823,10 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, /// Attempts to optimize the induction variable exit values for users in the /// exit block coming from the latch in the original scalar loop. -static VPValue * -optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo, - VPBlockBase *PredVPBB, VPValue *Op, - DenseMap &EndValues) { +static VPValue *optimizeLatchExitInductionUser( + VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op, + DenseMap &EndValues, + SmallDenseMap &MapIVs) { using namespace VPlanPatternMatch; VPValue *Incoming; @@ -834,7 +834,13 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo, m_VPValue(Incoming)))) return nullptr; - auto *WideIV = getOptimizableIVOf(Incoming); + // Truncated IV is not handled here. + auto *IntOrFpIV = dyn_cast(Incoming); + if (IntOrFpIV && IntOrFpIV->getTruncInst()) + return nullptr; + auto *WideIV = dyn_cast(Incoming); + if (!WideIV) + WideIV = MapIVs.lookup(Incoming); if (!WideIV) return nullptr; @@ -873,7 +879,8 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo, } void VPlanTransforms::optimizeInductionExitUsers( - VPlan &Plan, DenseMap &EndValues) { + VPlan &Plan, DenseMap &EndValues, + SmallDenseMap &MapIVs) { VPBlockBase *MiddleVPBB = Plan.getMiddleBlock(); VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { @@ -883,8 +890,9 @@ void VPlanTransforms::optimizeInductionExitUsers( for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) { VPValue *Escape = nullptr; if (PredVPBB == MiddleVPBB) - Escape = optimizeLatchExitInductionUser( - Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), EndValues); + Escape = optimizeLatchExitInductionUser(Plan, TypeInfo, PredVPBB, + ExitIRI->getOperand(Idx), + EndValues, MapIVs); else Escape = optimizeEarlyExitInductionUser(Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 8d2eded45da22..55d4a9e6a42e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -211,9 +211,9 @@ struct VPlanTransforms { /// If there's a single exit block, optimize its phi recipes that use exiting /// IV values by feeding them precomputed end values instead, possibly taken /// one step backwards. - static void - optimizeInductionExitUsers(VPlan &Plan, - DenseMap &EndValues); + static void optimizeInductionExitUsers( + VPlan &Plan, DenseMap &EndValues, + SmallDenseMap &MapIVs); /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors. static void materializeBroadcasts(VPlan &Plan); diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index 196c7552d0852..5f7b630560ab3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -113,7 +113,6 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -130,7 +129,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[SUB_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 036d5f5886234..5163b9dfc7423 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -368,11 +368,11 @@ define void @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[ADD]] = add i16 [[IV_2]], [[O_1]] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_DST]], align 2 @@ -439,11 +439,11 @@ define void @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i16 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i16 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[SUB]] = sub i16 [[IV_2]], [[O_1]] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i16 [[SUB]], ptr [[GEP_DST]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 7b7735434e67d..0ec0241ea2515 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1095,14 +1095,11 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: -; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 +; VEC-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 ; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2 -; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]] -; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -1118,7 +1115,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; VEC: [[E_EXIT]]: -; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; VEC-NEXT: ret i32 [[RES]] ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification( @@ -1136,7 +1133,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2 ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP1]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 ; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -1153,7 +1149,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[E_EXIT]]: -; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i32 [[RES]] ; entry: @@ -1182,12 +1178,9 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 ; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VEC: [[VECTOR_PH]]: -; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP_2]], i64 0 -; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2 ; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 @@ -1195,14 +1188,10 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2 ; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2 -; VEC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 1) -; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: -; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 ; VEC-NEXT: br i1 true, label %[[E_EXIT:.*]], label %[[SCALAR_PH]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] @@ -1216,7 +1205,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; VEC: [[E_EXIT]]: -; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; VEC-NEXT: ret i32 [[RES]] ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2( @@ -1235,8 +1224,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2 ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 1 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP4]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -1254,7 +1241,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[E_EXIT]]: -; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i32 [[RES]] ; entry: @@ -1286,16 +1273,11 @@ define i32 @iv_ext_used_outside( ptr %dst) { ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i16 [[OFFSET_IDX]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 0 ; VEC-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4 -; VEC-NEXT: [[TMP5:%.*]] = add nuw nsw <2 x i16> [[VEC_IND]], splat (i16 1) -; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1 -; VEC-NEXT: [[TMP7:%.*]] = zext nneg i16 [[TMP8]] to i32 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -1314,7 +1296,7 @@ define i32 @iv_ext_used_outside( ptr %dst) { ; VEC-NEXT: [[EC:%.*]] = icmp samesign ult i16 [[IV_1]], 128 ; VEC-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}} ; VEC: [[EXIT]]: -; VEC-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ 128, %[[MIDDLE_BLOCK]] ] ; VEC-NEXT: ret i32 [[IV_1_EXT_LCSSA]] ; ; INTERLEAVE-LABEL: define i32 @iv_ext_used_outside( @@ -1331,8 +1313,6 @@ define i32 @iv_ext_used_outside( ptr %dst) { ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i16 [[TMP1]] ; INTERLEAVE-NEXT: store i32 0, ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: store i32 0, ptr [[TMP3]], align 4 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = add nuw nsw i16 [[TMP1]], 1 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = zext nneg i16 [[TMP4]] to i32 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -1352,7 +1332,7 @@ define i32 @iv_ext_used_outside( ptr %dst) { ; INTERLEAVE-NEXT: [[EC:%.*]] = icmp samesign ult i16 [[IV_1]], 128 ; INTERLEAVE-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[EXIT]]: -; INTERLEAVE-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: [[IV_1_EXT_LCSSA:%.*]] = phi i32 [ [[IV_1_EXT]], %[[LOOP]] ], [ 128, %[[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i32 [[IV_1_EXT_LCSSA]] ; entry: @@ -1386,8 +1366,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 -; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 -; VEC-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1 ; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -1405,7 +1383,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1 ; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}} ; VEC: [[EXIT]]: -; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ] ; VEC-NEXT: ret i64 [[IV_1_NEXT_LCSSA]] ; ; INTERLEAVE-LABEL: define i64 @test_iv_increment_incremented( @@ -1419,8 +1397,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 2 ; INTERLEAVE-NEXT: store i16 1, ptr [[TMP0]], align 2 ; INTERLEAVE-NEXT: store i16 1, ptr [[TMP1]], align 2 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 1, -1 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 1 ; INTERLEAVE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -1438,7 +1414,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1 ; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[EXIT]]: -; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i64 [[IV_1_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index b0029a4e0d069..2df8645b5305a 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -17,20 +17,10 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[STEP_2]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP4]], 4 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() @@ -38,16 +28,10 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP11]], align 2 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = add [[BROADCAST_SPLAT]], [[STEP_ADD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 2 -; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[TMP15]], i32 [[TMP19]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2000, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[E_EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -61,7 +45,7 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 2000 ; CHECK-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[E_EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RES]] ; entry: