-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[VPlan] Dispatch to multiple exit blocks via middle blocks. #112138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
245b56a
47258de
9265fb1
3831acb
e64888a
64db0ee
3259e66
0f8aedf
9212f96
5cb0851
e849195
7b98d34
c53eca6
43a8ef7
e26af8e
06c3d39
552bd91
2042a43
00dea4a
7b8866d
4d5608f
b9ee739
43d5590
cba7dce
95f4276
c3d3b39
a875249
65d0288
8d04383
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| digraph VPlan { | ||
| graph [labelloc=t, fontsize=30; label=""] | ||
| node [shape=rect, fontname=Courier, fontsize=30] | ||
| edge [fontname=Courier, fontsize=30] | ||
| compound=true | ||
| N1 [label = | ||
| "vector.ph" | ||
| ] | ||
| N1 -> N2 [ label="" lhead=cluster_N3] | ||
| subgraph cluster_N3 { | ||
| fontname=Courier | ||
| label="\<x1\> vector loop" | ||
| N2 [label = | ||
| "vector.body" | ||
| ] | ||
| } | ||
| N2 -> N4 [ label="" ltail=cluster_N3] | ||
| N4 [label = | ||
| "middle.split" | ||
| ] | ||
| N4 -> N5 [ label=""] | ||
| N4 -> N6 [ label=""] | ||
| N5 [label = | ||
| "early.exit" | ||
| ] | ||
| N6 [label = | ||
| "middle.block" | ||
| ] | ||
| N6 -> N9 [ label=""] | ||
| N6 -> N7 [ label=""] | ||
| N7 [label = | ||
| "scalar.ph" | ||
| ] | ||
| N7 -> N8 [ label=""] | ||
| N8 [label = | ||
| "loop.header" | ||
| ] | ||
| N9 [label = | ||
| "latch.exit" | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -861,14 +861,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
| auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader); | ||
|
|
||
| // Create SCEV and VPValue for the trip count. | ||
|
|
||
| // Currently only loops with countable exits are vectorized, but calling | ||
| // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with | ||
| // uncountable exits whilst also ensuring the symbolic maximum and known | ||
| // back-edge taken count remain identical for loops with countable exits. | ||
| // We use the symbolic max backedge-taken-count, which works also when | ||
| // vectorizing loops with uncountable early exits. | ||
| const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
| assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it still worth at least having: ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, added back, thanks |
||
| BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) && | ||
| assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && | ||
| "Invalid loop count"); | ||
| ScalarEvolution &SE = *PSE.getSE(); | ||
| const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, | ||
|
|
@@ -903,7 +899,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
| // 2) If we require a scalar epilogue, there is no conditional branch as | ||
| // we unconditionally branch to the scalar preheader. Do nothing. | ||
| // 3) Otherwise, construct a runtime check. | ||
| BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); | ||
| BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); | ||
| auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); | ||
| // The connection order corresponds to the operands of the conditional branch. | ||
| VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -621,6 +621,14 @@ class VPBlockBase { | |
| /// Remove all the successors of this block. | ||
| void clearSuccessors() { Successors.clear(); } | ||
|
|
||
| /// Swap successors of the block. The block must have exactly 2 successors. | ||
| // TODO: This should be part of introducing conditional branch recipes rather | ||
| // than being independent. | ||
| void swapSuccessors() { | ||
| assert(Successors.size() == 2 && "must have 2 successors to swap"); | ||
| std::swap(Successors[0], Successors[1]); | ||
| } | ||
|
|
||
| /// The method which generates the output IR that correspond to this | ||
| /// VPBlockBase, thereby "executing" the VPlan. | ||
| virtual void execute(VPTransformState *State) = 0; | ||
|
|
@@ -1232,6 +1240,9 @@ class VPInstruction : public VPRecipeWithIRFlags, | |
| // operand). Only generates scalar values (either for the first lane only or | ||
| // for all lanes, depending on its uses). | ||
| PtrAdd, | ||
| // Returns a scalar boolean value, which is true if any lane of its single | ||
| // operand is true. | ||
| AnyOf, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth adding a simple comment here? Something along the lines of:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some explanation how AnyOf relates (or should relate) to ComputeReductionResult?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think AnyOf also needs adding to the switch statement in VPRecipeBase::mayWriteToMemory and return false? |
||
| }; | ||
|
|
||
| private: | ||
|
|
@@ -3884,10 +3895,10 @@ class VPlan { | |
| /// whether to execute the scalar tail loop or the exit block from the loop | ||
| /// latch. | ||
| const VPBasicBlock *getMiddleBlock() const { | ||
| return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor()); | ||
| return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor()); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This restricts to use of getMiddleBlock() to before bypassing guards are introduced?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it doesn't really restrict the use of getMiddleBlock, it just updates the anchor point we use to identify it; the scalar preheader (and single predecessor ) can be more easily identified and works automatically with the changes to the skeleton.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Works as long as the scalar preheader has a single predecessor, i.e., until runtime guards are introduced as additional predecessors.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, this will need some extra work for #114292, which I plan to land after this PR. |
||
| } | ||
| VPBasicBlock *getMiddleBlock() { | ||
| return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor()); | ||
| return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor()); | ||
| } | ||
|
|
||
| /// Return the VPBasicBlock for the preheader of the scalar loop. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better have
collectUsersInExitBlocks()indicate failure, thanaddUsersInExitBlocks(), which followsaddExitUsersForFirstOrderRecurrences()?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That would require
collectUsersInExitBlocksto check ifaddUsersInExitBlockswill be able to handle the user. Left as is for now