Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9629,6 +9629,16 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
// vectorizing the epilogue loop.
for (VPRecipeBase &R : Header->phis()) {
if (auto *IV = dyn_cast<VPCanonicalIVPHIRecipe>(&R)) {
// If we didn't find any PHIs, due to a simplification where all incoming
// values were equal (and necessarily zero), it means that the vector trip
// count is zero.
// TODO: We should not choose VF * UF so the main vector loop is known to
// be dead.
if (L->getLoopPreheader()->phis().empty()) {
EPI.VectorTripCount = ConstantInt::get(IV->getScalarType(), 0);
continue;
Comment on lines +9637 to +9639
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we asser that the scalar preheader in VPlan is unreachable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried assert(!Plan.getScalarPreheader()->hasPredecessors()), but it failed. What am I missing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in all these test diffs the resume value has both incoming values as zero because the scalar preheader is never taken, e.g. it's always br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]].

I think #154510 will make the scalar.ph block unreachable once it lands.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, it looks like @fhahn's patch isn't sufficient to insert the assert: the epilogue scalar.ph is still not removed. This is the test that fails:

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
index b83d3af3a0d6..8586e126b75b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
@@ -374,10 +374,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) {
 ; CHECK:       vec.epilog.iter.check:
 ; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = add <16 x i8> [[WIDE_LOAD7]], splat (i8 10)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the folding logic does not yet apply to epilogue vectorization, until the skeleton there is fully modeled in VPlan, still working on it

}

// When vectorizing the epilogue loop, the canonical induction start
// value needs to be changed from zero to the value after the main
// vector loop. Find the resume value created during execution of the main
Expand All @@ -9644,19 +9654,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
"Must only have a single non-zero incoming value");
EPI.VectorTripCount = Inc;
}
// If we didn't find a non-zero vector trip count, all incoming values
// must be zero, which also means the vector trip count is zero. Pick the
// first zero as vector trip count.
// TODO: We should not choose VF * UF so the main vector loop is known to
// be dead.
if (!EPI.VectorTripCount) {
assert(
EPResumeVal->getNumIncomingValues() > 0 &&
all_of(EPResumeVal->incoming_values(),
[](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
"all incoming values must be 0");
EPI.VectorTripCount = EPResumeVal->getOperand(0);
}
assert(EPI.VectorTripCount && "Must have an epilog vector trip-count");
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
assert(all_of(IV->users(),
[](const VPUser *U) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,8 +1225,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}

if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
if (all_equal(Phi->incoming_values()))
Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,10 +374,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) {
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
; CHECK-NEXT: [[TMP12:%.*]] = add <16 x i8> [[WIDE_LOAD7]], splat (i8 10)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,9 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[SRC]], align 1
; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i64
; CHECK-NEXT: [[ADD:%.*]] = or i64 [[L_EXT]], 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,9 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) {
; CHECK: middle.block:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: br i1 [[COND]], label [[LOOP_THEN:%.*]], label [[LOOP_ELSE:%.*]]
; CHECK: loop.then:
; CHECK-NEXT: br label [[LOOP_LATCH]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
; RV64-NEXT: br label %[[FOR_BODY:.*]]
; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP]]
Expand Down Expand Up @@ -493,8 +491,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; RV64: [[MIDDLE_BLOCK]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]]
; RV64: [[SCALAR_PH]]:
; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ]
; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ]
; RV64-NEXT: br label %[[FOR_BODY:.*]]
; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]:
; RV64-NEXT: br label %[[FOR_COND_CLEANUP]]
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
Original file line number Diff line number Diff line change
Expand Up @@ -684,10 +684,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED: middle.block:
; NOSTRIDED-NEXT: br label [[EXIT:%.*]]
; NOSTRIDED: scalar.ph:
; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; NOSTRIDED-NEXT: br label [[LOOP:%.*]]
; NOSTRIDED: loop:
; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; NOSTRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
Expand Down Expand Up @@ -842,10 +841,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED: middle.block:
; STRIDED-NEXT: br label [[EXIT:%.*]]
; STRIDED: scalar.ph:
; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ]
; STRIDED-NEXT: br label [[LOOP:%.*]]
; STRIDED: loop:
; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; STRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
; STRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4
Expand Down