-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Connect Entry to scalar preheader during initial construction. #140132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
d481278
76212e6
a324d27
2ad3e76
f4a28d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -490,7 +490,7 @@ class InnerLoopVectorizer { | |||||||||
| MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor), | ||||||||||
| Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI), | ||||||||||
| RTChecks(RTChecks), Plan(Plan), | ||||||||||
| VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {} | ||||||||||
| VectorPHVPB(Plan.getVectorLoopRegion()->getSinglePredecessor()) {} | ||||||||||
|
|
||||||||||
| virtual ~InnerLoopVectorizer() = default; | ||||||||||
|
|
||||||||||
|
|
@@ -2368,14 +2368,11 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { | |||||||||
| void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarify that CheckBlock now excludes the initial trip-count check, which is expected to be already introduced before calling
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a note, thanks |
||||||||||
| VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||||||||||
| VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor(); | ||||||||||
| if (PreVectorPH->getNumSuccessors() != 1) { | ||||||||||
| assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); | ||||||||||
| assert(PreVectorPH->getSuccessors()[0] == ScalarPH && | ||||||||||
| "Unexpected successor"); | ||||||||||
| VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB); | ||||||||||
| VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB); | ||||||||||
| PreVectorPH = CheckVPIRBB; | ||||||||||
| } | ||||||||||
| assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); | ||||||||||
| assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor"); | ||||||||||
| VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB); | ||||||||||
| VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB); | ||||||||||
| PreVectorPH = CheckVPIRBB; | ||||||||||
| VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH); | ||||||||||
| PreVectorPH->swapSuccessors(); | ||||||||||
|
|
||||||||||
|
|
@@ -2466,9 +2463,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { | |||||||||
| setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||||||||||
| ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||||||||||
| LoopBypassBlocks.push_back(TCCheckBlock); | ||||||||||
|
|
||||||||||
| // TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here. | ||||||||||
| introduceCheckBlockInVPlan(TCCheckBlock); | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Connecting entry VPBB to scalar preheader VPBB is done here as part of introducing the conditional branch at end of entry IRBB?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TCCheckBlock is the same block as the VPlan entry block. Before the change, we needed special logic to not create a new VPIRBB for this call.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Worth an assert to clarify?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||||||||||
| } | ||||||||||
|
|
||||||||||
| BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { | ||||||||||
|
|
@@ -7643,7 +7637,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||||||||
|
|
||||||||||
| // 1. Set up the skeleton for vectorization, including vector pre-header and | ||||||||||
| // middle block. The vector loop is created during VPlan execution. | ||||||||||
| VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor()); | ||||||||||
| VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]); | ||||||||||
| State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); | ||||||||||
| if (VectorizingEpilogue) | ||||||||||
| VPlanTransforms::removeDeadRecipes(BestVPlan); | ||||||||||
|
|
@@ -7876,7 +7870,10 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, | |||||||||
| setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||||||||||
| ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||||||||||
|
|
||||||||||
| introduceCheckBlockInVPlan(TCCheckBlock); | ||||||||||
| // Only generate add a new block for the trip-count check for the main loop. | ||||||||||
| // The epilogue will use the already existing VPlan entry block. | ||||||||||
|
||||||||||
| // Only generate add a new block for the trip-count check for the main loop. | |
| // The epilogue will use the already existing VPlan entry block. | |
| // When vectorizing the main loop, its trip-count check is placed in a new block, whereas the overall trip-count check is placed in the VPlan entry block. | |
| // When vectorizing the epilogue loop, its trip-count check is placed in the VPlan entry block. |
sounds clearer? Referring to https://llvm.org/docs/Vectorizers.html#epilogue-vectorization
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (!ForEpilogue) | |
| // Worth explaining? | |
| if (!ForEpilogue) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggesting a getMiddleBlock(Plan) utility?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, will check where this could be useful separately.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -540,6 +540,9 @@ void VPlanTransforms::prepareForVectorization( | |
| if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor()) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent: worth clarifying that not requiring a scalar epilog check means the scalar epilog is (always) required, i.e., case 1. |
||
| VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB); | ||
| VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
| VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
| Plan.getEntry()->swapSuccessors(); | ||
|
|
||
| // The exit blocks are unreachable, remove their recipes to make sure no | ||
| // users remain that may pessimize transforms. | ||
| for (auto *EB : Plan.getExitBlocks()) { | ||
|
|
@@ -552,6 +555,9 @@ void VPlanTransforms::prepareForVectorization( | |
| // The connection order corresponds to the operands of the conditional branch, | ||
| // with the middle block already connected to the exit block. | ||
| VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
| // Also connect the entry block to the scalar preheader. | ||
| VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The CFG is less consistent with VPlan's recipes, connecting entry VPBB to scalarPH VPBB now, when it is already connected to vectorPH, w/o introducing a conditional branch recipe at the end of entry - the branch instruction is generated outside VPlan's execute.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, we still need to allow terminator-less VPIRBBs for various parts of the skeleton. One of the next steps after adding it early here is adding a branch recipe.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Temporary inconsistency is worth clarifying in a TODO.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||
| Plan.getEntry()->swapSuccessors(); | ||
|
|
||
| auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); | ||
| // Here we use the same DebugLoc as the scalar loop latch terminator instead | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1846,7 +1846,7 @@ static void removeBranchOnCondTrue(VPlan &Plan) { | |
| using namespace llvm::VPlanPatternMatch; | ||
| for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( | ||
| vp_depth_first_shallow(Plan.getEntry()))) { | ||
| if (VPBB->getNumSuccessors() != 2 || | ||
| if (VPBB->getNumSuccessors() != 2 || isa<VPIRBasicBlock>(VPBB) || | ||
|
||
| !match(&VPBB->back(), m_BranchOnCond(m_True()))) | ||
| continue; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that ILV now depends on HCFG here, and elsewhere. Maybe better to retrieve the first hierarchical predecessor of first header block, to relax this dependence, possibly as follow up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good!