-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Remove PtrIV::IsScalarAfterVectorization, use VPlan analysis. #168289
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesRemove Full diff: https://github.com/llvm/llvm-project/pull/168289.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cbfbc29360b0b..66b6ddcdb545c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7655,7 +7655,7 @@ createWidenInductionRecipes(VPInstruction *PhiR,
}
VPHeaderPHIRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI, VFRange &Range) {
+VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI) {
auto *Phi = cast<PHINode>(VPI->getUnderlyingInstr());
// Check if this is an integer or fp induction. If so, build the recipe that
@@ -7668,11 +7668,6 @@ VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI, VFRange &Range) {
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
return new VPWidenPointerInductionRecipe(
Phi, VPI->getOperand(0), Step, &Plan.getVFxUF(), *II,
- LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) {
- return CM.isScalarAfterVectorization(Phi, VF);
- },
- Range),
VPI->getDebugLoc());
}
return nullptr;
@@ -8162,7 +8157,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
"Non-header phis should have been handled during predication");
auto *Phi = cast<PHINode>(R->getUnderlyingInstr());
assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis");
- if ((Recipe = tryToOptimizeInductionPHI(PhiR, Range)))
+ if ((Recipe = tryToOptimizeInductionPHI(PhiR)))
return Recipe;
VPHeaderPHIRecipe *PhiRecipe = nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index a7000aff06379..c6019e733fe77 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -100,8 +100,7 @@ class VPRecipeBuilder {
/// Check if an induction recipe should be constructed for \p VPI. If so build
/// and return it. If not, return null.
- VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI,
- VFRange &Range);
+ VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI);
/// Optimize the special case where the operand of \p VPI is a constant
/// integer induction variable.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ea88eaa42d945..3fd6ec1f24255 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2211,19 +2211,15 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
};
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
- bool IsScalarAfterVectorization;
-
public:
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
/// Start and the number of elements unrolled \p NumUnrolledElems, typically
/// VF*UF.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
VPValue *NumUnrolledElems,
- const InductionDescriptor &IndDesc,
- bool IsScalarAfterVectorization, DebugLoc DL)
+ const InductionDescriptor &IndDesc, DebugLoc DL)
: VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
- Step, IndDesc, DL),
- IsScalarAfterVectorization(IsScalarAfterVectorization) {
+ Step, IndDesc, DL) {
addOperand(NumUnrolledElems);
}
@@ -2232,8 +2228,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
VPWidenPointerInductionRecipe *clone() override {
return new VPWidenPointerInductionRecipe(
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
- getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
- getDebugLoc());
+ getOperand(2), getInductionDescriptor(), getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index aa85bd435ee9e..5071941eb1413 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -4310,7 +4310,7 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
- return IsScalarAfterVectorization &&
+ return vputils::onlyScalarValuesUsed(this) &&
(!IsScalable || vputils::onlyFirstLaneUsed(this));
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index d23e3c29b59e5..28b2d30b03fd6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -11,21 +11,14 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 10000
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]]
-; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> splat (i64 2)
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
@@ -40,25 +33,24 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
-; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX6]], 0
-; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX6]], 1
-; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]]
-; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP7]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP8]], i32 1
+; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[POINTER_PHI2]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT: [[NEXT_GEP7:%.*]] = extractelement <2 x ptr> [[TMP20]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
@@ -66,6 +58,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
+; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, ptr [[POINTER_PHI2]], i64 2
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 10000
; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; CHECK: vec.epilog.middle.block:
@@ -136,7 +129,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2
@@ -221,7 +214,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC]]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -304,7 +297,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2
@@ -416,7 +409,7 @@ define void @test_widen_truncated_induction(ptr %A) {
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index f223786a07cdf..456c03824106a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -112,15 +112,11 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP12]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP15]], align 1
; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1)
; CHECK-NEXT: store <vscale x 2 x i8> [[TMP17]], ptr [[TMP15]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP4]]
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll
index 3a07bcca523ce..13da121fe2dc2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll
@@ -25,17 +25,14 @@ define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[TMP8]], i32 1
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 16>
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll
index cebf90ae9f8ce..94392f856c386 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll
@@ -15,16 +15,20 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[SRC]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 8, i32 16, i32 24>
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], 8
+; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 16
+; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 24
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP16]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP17]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP2]]
@@ -36,7 +40,6 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s
; CHECK-NEXT: store ptr [[TMP13]], ptr [[TMP9]], align 8
; CHECK-NEXT: store ptr [[TMP14]], ptr [[TMP10]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 32
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
artagnon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, with formatting fixed, thanks!
Remove
VPWidenPointerInductionRecipe::IsScalarAfterVectorizationand replace it withonlyScalarValuesUsed. This removes the need to carry state from the legacy cost model through VPlan, and the VPlan-based analysis gives more accurate results, avoding a number of extracts.