-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Build initial VPlan 0 using HCFGBuilder for inner loops. (NFC) #124432
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
85721ea
b8ef9b4
d2bdde2
953eeff
6a872d7
efc4b83
7a9e7ab
f8fa5c0
83e933a
73e2beb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -9238,6 +9238,7 @@ static void addExitUsersForFirstOrderRecurrences( | |||||||||||||
| VPlanPtr | ||||||||||||||
| LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | ||||||||||||||
|
|
||||||||||||||
| using namespace llvm::VPlanPatternMatch; | ||||||||||||||
| SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups; | ||||||||||||||
|
|
||||||||||||||
| // --------------------------------------------------------------------------- | ||||||||||||||
|
|
@@ -9261,6 +9262,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||||||||||||
| PSE, RequiresScalarEpilogueCheck, | ||||||||||||||
| CM.foldTailByMasking(), OrigLoop); | ||||||||||||||
|
|
||||||||||||||
| // Build hierarchical CFG. | ||||||||||||||
| VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); | ||||||||||||||
| HCFGBuilder.buildHierarchicalCFG(); | ||||||||||||||
|
|
||||||||||||||
| // Don't use getDecisionAndClampRange here, because we don't know the UF | ||||||||||||||
| // so this function is better to be conservative, rather than to split | ||||||||||||||
| // it up into different VPlans. | ||||||||||||||
|
|
@@ -9311,12 +9316,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||||||||||||
| // Construct recipes for the instructions in the loop | ||||||||||||||
| // --------------------------------------------------------------------------- | ||||||||||||||
|
|
||||||||||||||
| // Scan the body of the loop in a topological order to visit each basic block | ||||||||||||||
| // after having visited its predecessor basic blocks. | ||||||||||||||
| LoopBlocksDFS DFS(OrigLoop); | ||||||||||||||
| DFS.perform(LI); | ||||||||||||||
|
|
||||||||||||||
| VPBasicBlock *HeaderVPBB = Plan->getVectorLoopRegion()->getEntryBasicBlock(); | ||||||||||||||
| VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); | ||||||||||||||
| VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); | ||||||||||||||
| VPBasicBlock *VPBB = HeaderVPBB; | ||||||||||||||
| BasicBlock *HeaderBB = OrigLoop->getHeader(); | ||||||||||||||
| bool NeedsMasks = | ||||||||||||||
|
|
@@ -9329,23 +9330,58 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||||||||||||
| RecipeBuilder.collectScaledReductions(Range); | ||||||||||||||
|
|
||||||||||||||
| auto *MiddleVPBB = Plan->getMiddleBlock(); | ||||||||||||||
|
|
||||||||||||||
| // Scan the body of the loop in a topological order to visit each basic block | ||||||||||||||
| // after having visited its predecessor basic blocks. | ||||||||||||||
| ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move the above comment over here:
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks |
||||||||||||||
| HeaderVPBB); | ||||||||||||||
|
|
||||||||||||||
| VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi(); | ||||||||||||||
| for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also remove above which become dead.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks |
||||||||||||||
| // Relevant instructions from basic block BB will be grouped into VPRecipe | ||||||||||||||
| // ingredients and fill a new VPBasicBlock. | ||||||||||||||
| if (VPBB != HeaderVPBB) | ||||||||||||||
| VPBB->setName(BB->getName()); | ||||||||||||||
| Builder.setInsertPoint(VPBB); | ||||||||||||||
| VPBlockBase *PrevVPBB = nullptr; | ||||||||||||||
| for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { | ||||||||||||||
| // Skip VPBBs not corresponding to any input IR basic blocks. | ||||||||||||||
|
||||||||||||||
| // Skip VPBBs not corresponding to any input IR basic blocks. | |
| // Handle VPBBs down to the latch. |
Latch also conceptually has a corresponding IRBB, with underlying Instructions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks. Initially the latch in the IR is not the latch/exiting block in the VPlan; the exiting block is created as part of the skeleton.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (!HCFGBuilder.getIRBBForVPB(VPBB)) { | |
| assert(VPBB == LoopRegion->getExiting() && | |
| "only the latch block shouldn't have a corresponding IRBB"); | |
| if (VPBB == LoopRegion->getExiting()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we set the insert point of Builder once, here, as done now, possibly to first non-Phi of VPBB?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately that doesn't play nice with remove the recipes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: setting insert point here to first non phi of VPBB instead of beginning of VPBB doesn't play well with recipe removal?
| Builder.setInsertPoint(VPBB, VPBB->begin()); | |
| Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to insert at the begining here, as the blocks other than the header may contain VPWidenPhis that need to be converted to VPBlend, and the mask needs to be inserted at the beginning of the block for those blends.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So non-phi recipes that compute a block's mask must be introduced before the phi recipes of the block, because the latter is eventually replaced by blends that use the former? This calls for a FIXME note.
One alternative may be to sink the blends when they replace the phis.
Another is to introduce masking recipes along with replacing phi recipes with blends, as done currently in tryToBuildVPlanWithRecipes().
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a FIXME for now. We should be able to sink the blends at the right place.
The whole mask creation logic will be replaced when rewriting to perform predication on VPlan, cleaning up the logic here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the header mask also be created only if NeedsMasks? (Seems independent)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently createHeaderMask itself checks if tail folding is enabled, and only then creates the mask
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Time to move edge masks and block masks to be cached according to VPBB's rather than IRBB's?
Rather than hacking VPlanHCFGBuilder and PlainCFGBuilder to record and retrieve IRBB for VPBB.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is somewhat a chicken-and-egg problem. Without this patch, we cannot do that, because we only ever create the flattened CFG in VPlan. I might be missing another option, but I think once this patch lands as first step we can move the mask creation to be based on the VPBlocks. WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I gave this a try, but it doesn't work out with the flattening of the CFG here, because we might already removed edges earlier before querying a mask on that edge.
We could first compute all masks in a separate loop, but there also are quite a few places that need updating, include BasicBlock -> VPBasicBlock, various places that get the mask for the parent of an instruction, looking for BranchOnCond instead of BranchInst, so that would also increase the diff quite a bit unfortunately. It would probably be better to tackle that separately, also to make testing easier.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, another suggestion: getIRBBForVPB() is needed only here - for non-header (non-latch) (non-empty?) VPBB, whose recipes are all expected to have underlying Instructions, which know their parental BasicBlock? Can the latter be retrieved as in below rather than hacking HCFGBuilder to record and expose this VRBB-to-IRBB mapping?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfrotunately we don't create recipes for branches, so there may be empty blocks for which we would need to create masks (because there succesors may require them at the moment).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, let's go with HCFGBuilder.getIRBBForVPB(), for now ...
Following our roadmap, HCFGBuilder would be the VPlan-to-VPlan transformation producing "buildLoop" from "wrapInput", and tryToBuildVPlanWithRecipes would follow as a subsequent VPlan-to-VPlan transformation(s). Obtaining this VPBB-to-IRBB mapping from HCFGBuilder should be removed in one of two ways (or more):
- If the buildLoop VPlan is to maintain its correspondence with IR basic blocks then a new type of VPBB can be introduced - one providing getIRBasicBlock() similar to VPIRBasicBlock but having an execute() similar to VPBasicBlock - that generates new basic blocks, or deemed abstract/unreachable - to be materialized into a concreate VPBB prior to codegen.
- If, OTOH, getIRBBForVPB() is meant only as a temporary solution to support createBlockInMask() until the latter is upgraded to work directly on VPBB's, then a FIXME should be added.
(Recipes are created for internal conditional branches but not for unconditional branches (including early exits) - which imply a single successor. An empty block also implies a single predecessor due to lack of phi recipes. Figuring out the BasicBlock from the single predecessor and/or successor of an empty VPBB seems cumbersome; getIRBBForVPB() could record the mapping for empty VPBB's only; simplest to probably to record all VPBB's in loop, for now.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a FXIME to the declaration, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| RecipeBuilder.createBlockInMask(HCFGBuilder.getIRBBForVPB(VPBB)); | |
| auto *SingleDef = cast<VPSingleDefRecipe>(VPBB->begin()); | |
| Instruction *Instr = SingleDef->getUnderlyingInstr(); | |
| RecipeBuilder.createBlockInMask(Instr->getParent()); |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| auto *SingleDef = cast<VPSingleDefRecipe>(&R); | |
| auto *SingleDef = cast<VPSingleDefRecipe>(&R); | |
| auto *UnderlyingValue = SingleDef->getUnderlyingValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a FIXME to stop relying on the underlying values of VPInstructions.
The "buildLoop" VPlan0 should use VPInstructions with flags and/or other recipes to best model a copy of the original scalar loop, in contrast to VPIRInstructions which model the original scalar loop itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // latter is added by masking. | |
| // latter are added above for masking. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: consistency
| if (isa<VPCanonicalIVPHIRecipe>(SingleDef) || | |
| isa<VPWidenCanonicalIVRecipe>(SingleDef) || | |
| (isa<VPInstruction>(&R) && !SingleDef->getUnderlyingValue())) | |
| if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe>(&R) || | |
| (isa<VPInstruction>(&R) && !UnderlyingValue)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done ,thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a FIXME to stop using VPWidenPHIRecipe in VPlan0, which models (a copy of) the original scalar loop.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: consistency
| assert(isa<VPWidenPHIRecipe>(&R) || (isa<VPInstruction>(SingleDef) && | |
| SingleDef->getUnderlyingValue()) && | |
| assert(isa<VPWidenPHIRecipe, VPInstruction>(&R) && UnderlyingValue && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure why but VPWidenPHIRecipe, VPInstruction cannot be used in a single isa<> here. Will need to look deeper what's going on
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Extra parens around the isa, since assert is a macro?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can match via some m_Switch?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately at the moment the matchers require a fixed number of operands; need to think about how to match without a fixed number of ops.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then perhaps more consistent / simpler to do:
| if (match(&R, m_BranchOnCond(m_VPValue())) || | |
| (isa<VPInstruction>(&R) && | |
| cast<VPInstruction>(&R)->getOpcode() == Instruction::Switch)) { | |
| if (isa<VPInstruction>(&R) && | |
| (cast<VPInstruction>(&R)->getOpcode() == VPInstruction::BranchOnCond || | |
| (cast<VPInstruction>(&R)->getOpcode() == Instruction::Switch)) { |
(once VPWidenPHIRecipe is turned into a VPInstruction this would be simpler)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: define Instr earlier to be reused above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getUnderlyingInstr will assert if there's no underlying value, left here for now
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth commenting that the other operand of the header phi - the one across the back-edge, will be added later?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| } | |
| R.eraseFromParent(); | |
| R.eraseFromParent(); | |
| } |
otherwise R seems to be dropped altogether? Is this case tested?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There may be multiple stores to the invariant address of a reduction (Legal->isInvariantAddressOfReduction(SI->getPointerOperand())). All of those need to be removed, and only the final one (isInvariantStoreOfReduction) will produce the final store recipe .
There are tests covering the cases with single and multiple stores
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This assert is associated with the preceding explanation. Both become obsolete?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to only move truncated inductions, for which it still is needed
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rephrase?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
| Builder.insert(Recipe); | |
| Builder.insert(Recipe); | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // predecessors and successors. Then connect VPBB to the previously visited | |
| // VPBB. | |
| // successors. Then connect VPBB to the previously visited VPBB. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -592,12 +592,16 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { | |
| bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) || | ||
| match(R, m_BranchOnCond(m_VPValue())) || | ||
| match(R, m_BranchOnCount(m_VPValue(), m_VPValue())); | ||
| bool IsSwitch = isa<VPInstruction>(R) && | ||
| cast<VPInstruction>(R)->getOpcode() == Instruction::Switch; | ||
| (void)IsCondBranch; | ||
| (void)IsSwitch; | ||
|
|
||
| if (VPBB->getNumSuccessors() >= 2 || | ||
|
||
| (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) { | ||
| assert(IsCondBranch && "block with multiple successors not terminated by " | ||
| "conditional branch recipe"); | ||
| assert((IsCondBranch || IsSwitch) && | ||
| "block with multiple successors not terminated by " | ||
| "conditional branch nor switch recipe"); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,7 +75,7 @@ class PlainCFGBuilder { | |
| : TheLoop(Lp), LI(LI), Plan(P) {} | ||
|
|
||
| /// Build plain CFG for TheLoop and connects it to Plan's entry. | ||
| void buildPlainCFG(); | ||
| void buildPlainCFG(DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB); | ||
| }; | ||
| } // anonymous namespace | ||
|
|
||
|
|
@@ -237,10 +237,10 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) { | |
| // Instruction definition is in outermost loop PH. | ||
| return false; | ||
|
|
||
| // Check whether Instruction definition is in the loop exit. | ||
| BasicBlock *Exit = TheLoop->getUniqueExitBlock(); | ||
| assert(Exit && "Expected loop with single exit."); | ||
| if (InstParent == Exit) { | ||
| // Check whether Instruction definition is in a loop exit. | ||
| SmallVector<BasicBlock *> ExitBlocks; | ||
| TheLoop->getExitBlocks(ExitBlocks); | ||
| if (is_contained(ExitBlocks, InstParent)) { | ||
| // Instruction definition is in outermost loop exit. | ||
| return false; | ||
| } | ||
|
Comment on lines
+241
to
246
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No longer expecting a loop with single exit?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is needed for the inner loop multi-exit support. The native path still won't vectorize early exits, due to checks in legality I think. I can add a test case, thanks |
||
|
|
@@ -283,6 +283,7 @@ VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) { | |
| void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, | ||
| BasicBlock *BB) { | ||
| VPIRBuilder.setInsertPoint(VPBB); | ||
| // TODO: Model and preserve debug intrinsics in VPlan. | ||
| for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) { | ||
| Instruction *Inst = &InstRef; | ||
|
|
||
|
|
@@ -308,6 +309,14 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, | |
| continue; | ||
| } | ||
|
|
||
| if (auto *SI = dyn_cast<SwitchInst>(Inst)) { | ||
| SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())}; | ||
| for (auto Case : SI->cases()) | ||
| Ops.push_back(getOrCreateVPOperand(Case.getCaseValue())); | ||
| VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst); | ||
| continue; | ||
| } | ||
|
|
||
|
Comment on lines
+312
to
+319
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Teaching PlainCFGBuilder about switch statements - does this imply that "native" can now vectorize outerloops with switches, and worth testing?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The native path still won't vectorize switches, due to checks in legality I think. I can add a test case, thanks |
||
| VPValue *NewVPV; | ||
| if (auto *Phi = dyn_cast<PHINode>(Inst)) { | ||
| // Phi node's operands may have not been visited at this point. We create | ||
|
|
@@ -334,7 +343,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, | |
| } | ||
|
|
||
| // Main interface to build the plain CFG. | ||
| void PlainCFGBuilder::buildPlainCFG() { | ||
| void PlainCFGBuilder::buildPlainCFG( | ||
| DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) { | ||
| // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the | ||
| // skeleton. These were created directly rather than via getOrCreateVPBB(), | ||
| // revisit them now to update BB2VPBB. Note that header/entry and | ||
|
|
@@ -423,6 +433,14 @@ void PlainCFGBuilder::buildPlainCFG() { | |
| // Set VPBB successors. We create empty VPBBs for successors if they don't | ||
| // exist already. Recipes will be created when the successor is visited | ||
| // during the RPO traversal. | ||
| if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { | ||
| SmallVector<VPBlockBase *> Succs = { | ||
| getOrCreateVPBB(SI->getDefaultDest())}; | ||
| for (auto Case : SI->cases()) | ||
| Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor())); | ||
| VPBB->setSuccessors(Succs); | ||
| continue; | ||
| } | ||
| auto *BI = cast<BranchInst>(BB->getTerminator()); | ||
| unsigned NumSuccs = succ_size(BB); | ||
| if (NumSuccs == 1) { | ||
|
|
@@ -476,11 +494,14 @@ void PlainCFGBuilder::buildPlainCFG() { | |
| // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding | ||
| // VPlan operands. | ||
| fixPhiNodes(); | ||
|
|
||
| for (const auto &[IRBB, VPB] : BB2VPBB) | ||
| VPB2IRBB[VPB] = IRBB; | ||
| } | ||
|
|
||
| void VPlanHCFGBuilder::buildPlainCFG() { | ||
| PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); | ||
| PCFGBuilder.buildPlainCFG(); | ||
| PCFGBuilder.buildPlainCFG(VPB2IRBB); | ||
| } | ||
|
|
||
| // Public interface to build a H-CFG. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps
VPlanHCFGBuilder::buildingHierarchicalCFG()which creates initial VPBB's inside loop region ( viaPlainCFGBuilder::buildPlainCFG()) should now be combined withVPlan::createInitialVPlan()which creates initial VPBB's elsewhere?Can be done as follow-up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, maybe best to merge as follow-up