-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[VPlan] Support VPWidenPointerInductionRecipes with EVL tail folding #152110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
0e19e9e
cd4a7be
4b31963
4613265
cf41972
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2177,8 +2177,18 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { | |
assert(all_of(Plan.getVF().users(), | ||
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe, | ||
VPWidenIntOrFpInductionRecipe>) && | ||
all_of(Plan.getVFxUF().users(), | ||
[&Plan](VPUser *U) { | ||
return match(U, m_c_Binary<Instruction::Add>( | ||
m_Specific(Plan.getCanonicalIV()), | ||
m_Specific(&Plan.getVFxUF()))) || | ||
isa<VPWidenPointerInductionRecipe>(U); | ||
}) && | ||
"User of VF that we can't transform to EVL."); | ||
Plan.getVF().replaceAllUsesWith(&EVL); | ||
Plan.getVFxUF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) { | ||
return isa<VPWidenPointerInductionRecipe>(U); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should have an explanation and possibly an assert. The only other allowed users is the canonical IV increment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, the assertion is handled in the existing assert above. Will add a comment |
||
}); | ||
|
||
// Defer erasing recipes till the end so that we don't invalidate the | ||
// VPTypeAnalysis cache. | ||
|
@@ -2315,16 +2325,9 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { | |
/// %NextAVL = sub IVSize nuw %AVL, %OpEVL | ||
/// ... | ||
/// | ||
bool VPlanTransforms::tryAddExplicitVectorLength( | ||
void VPlanTransforms::addExplicitVectorLength( | ||
VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) { | ||
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | ||
// The transform updates all users of inductions to work based on EVL, instead | ||
// of the VF directly. At the moment, widened pointer inductions cannot be | ||
// updated, so bail out if the plan contains any. | ||
bool ContainsWidenPointerInductions = | ||
any_of(Header->phis(), IsaPred<VPWidenPointerInductionRecipe>); | ||
if (ContainsWidenPointerInductions) | ||
return false; | ||
|
||
auto *CanonicalIVPHI = Plan.getCanonicalIV(); | ||
auto *CanIVTy = CanonicalIVPHI->getScalarType(); | ||
|
@@ -2379,7 +2382,6 @@ bool VPlanTransforms::tryAddExplicitVectorLength( | |
CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); | ||
// TODO: support unroll factor > 1. | ||
Plan.setUF(1); | ||
return true; | ||
} | ||
|
||
void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { | ||
|
@@ -2803,13 +2805,12 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, | |
R->replaceAllUsesWith(PtrAdd); | ||
|
||
// Create the backedge value for the scalar pointer phi. | ||
Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); | ||
VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock(); | ||
Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); | ||
Comment on lines
+2813
to
+2814
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is this related to the EVL change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously the backedge value was created after the first non PHI, which uses VF. However now that the VF passed might be the defined in the vector loop body (i.e. the VPInstruction::ExplicitVectorLength) this inserts it at the end of the region where it should dominate. |
||
VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF), | ||
DL); | ||
VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF}); | ||
|
||
VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock(); | ||
Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); | ||
VPValue *InductionGEP = | ||
Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind"); | ||
ScalarPtrPhi->addOperand(InductionGEP); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,36 @@ | ||
; RUN: opt -passes=loop-vectorize \ | ||
; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ | ||
; RUN: -mtriple=riscv64 -mattr=+v -S -debug %s 2>&1 | FileCheck %s | ||
; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S -debug %s 2>&1 | FileCheck %s | ||
|
||
; REQUIRES: asserts | ||
|
||
; Make sure we do not vectorize a loop with a widened pointer induction. | ||
define void @test_wide_pointer_induction(ptr noalias %a, i64 %N) { | ||
; For %for.1, we are fine initially, because the previous value %for.1.next dominates the | ||
; user of %for.1. But for %for.2, we have to sink the user (%for.1.next) past the previous | ||
; value %for.2.next. This however breaks the condition we have for %for.1. We cannot fix | ||
; both first order recurrences and cannot vectorize the loop. | ||
; | ||
; Make sure we don't compute costs if there are no vector VPlans. | ||
|
||
; CHECK-NOT: LV: Vector loop of width {{.+}} costs: | ||
; | ||
; CHECK: define void @test_wide_pointer_induction( | ||
; CHECK: define i32 @test( | ||
; CHECK-NOT: vector.body | ||
; | ||
define i32 @test(i32 %N) { | ||
entry: | ||
br label %loop | ||
br label %for.body | ||
|
||
loop: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] | ||
%iv.ptr = phi ptr [ %a, %entry ], [ %iv.ptr.next, %loop ] | ||
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv | ||
store ptr %iv.ptr, ptr %arrayidx, align 8 | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
%iv.ptr.next = getelementptr i64, ptr %iv.ptr, i32 1 | ||
%exitcond.not = icmp eq i64 %iv.next, %N | ||
br i1 %exitcond.not, label %exit, label %loop | ||
for.body: ; preds = %for.body.preheader, %for.body | ||
%iv = phi i32 [ %inc, %for.body ], [ 10, %entry ] | ||
%for.1 = phi i32 [ %for.1.next, %for.body ], [ 20, %entry ] | ||
%for.2 = phi i32 [ %for.2.next, %for.body ], [ 11, %entry ] | ||
%for.1.next = add nsw i32 %for.2, 1 | ||
%for.2.next = shl i32 %for.1, 24 | ||
%inc = add nsw i32 %iv, 1 | ||
%exitcond = icmp eq i32 %inc, %N | ||
br i1 %exitcond, label %for.cond1.for.end_crit_edge, label %for.body | ||
|
||
exit: | ||
ret void | ||
for.cond1.for.end_crit_edge: ; preds = %for.body | ||
%add.lcssa = phi i32 [ %for.1.next, %for.body ] | ||
%sext.lcssa = phi i32 [ %for.2.next, %for.body ] | ||
%res = add i32 %add.lcssa, %sext.lcssa | ||
ret i32 %res | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably best to split up into 2 asserts with separate messages
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in cf41972