-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut. #109975
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
8482c79
a688a02
30d0692
fe6138c
922d066
e5ba9a1
9fb2d45
769c4aa
28b51f5
8fc5313
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -2711,7 +2711,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( | |||
| | | | ||||
| (opt) v <-- edge from middle to exit iff epilogue is not required. | ||||
| | [ ] \ | ||||
| | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue). | ||||
| | [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header | ||||
| | | wrapped in VPIRBasicBlock). | ||||
| \ | | ||||
| \ v | ||||
| >[ ] <-- exit block(s). (wrapped in VPIRBasicBlock) | ||||
|
|
@@ -2969,10 +2970,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |||
| IVEndValues[Entry.first], LoopMiddleBlock, Plan, State); | ||||
| } | ||||
|
|
||||
| // Fix live-out phis not already fixed earlier. | ||||
| for (const auto &KV : Plan.getLiveOuts()) | ||||
| KV.second->fixPhi(Plan, State); | ||||
|
|
||||
| for (Instruction *PI : PredicatedInstructions) | ||||
| sinkScalarOperands(&*PI); | ||||
|
|
||||
|
|
@@ -8790,6 +8787,31 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, | |||
| {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); | ||||
| } | ||||
|
|
||||
| /// Create resume phis in the scalar preheader for first-order recurrences and | ||||
| /// reductions and update the VPIRInstructions wrapping the original phis in the | ||||
| /// scalar header. | ||||
| static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { | ||||
| for (VPRecipeBase &R : *Plan.getScalarHeader()) { | ||||
| auto *IRI = cast<VPIRInstruction>(&R); | ||||
| if (!isa<PHINode>(IRI->getInstruction())) | ||||
| break; | ||||
|
|
||||
| VPBuilder ScalarPHBuilder(Plan.getScalarPreheader()); | ||||
| auto *VectorR = | ||||
| dyn_cast<VPHeaderPHIRecipe>(Builder.getRecipe(&IRI->getInstruction())); | ||||
| if (isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(VectorR)) { | ||||
| StringRef Name = isa<VPFirstOrderRecurrencePHIRecipe>(VectorR) | ||||
| ? "scalar.recur.init" | ||||
| : "bc.merge.rdx"; | ||||
| auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( | ||||
| VPInstruction::ResumePhi, | ||||
| {VectorR->getBackedgeValue(), VectorR->getStartValue()}, {}, Name); | ||||
|
|
||||
| IRI->addOperand(ResumePhiRecipe); | ||||
| } | ||||
| } | ||||
| } | ||||
|
|
||||
| // Collect VPIRInstructions for phis in the original exit block that are modeled | ||||
| // in VPlan and add the exiting VPValue as operand. Some exiting values are not | ||||
| // modeled explicitly yet and won't be included. Those are un-truncated | ||||
|
|
@@ -8876,24 +8898,8 @@ addUsersInExitBlock(VPlan &Plan, | |||
| static void addLiveOutsForFirstOrderRecurrences( | ||||
|
||||
| VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) { | ||||
| VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); | ||||
|
|
||||
| // Start by finding out if middle block branches to scalar preheader, which is | ||||
| // not a VPIRBasicBlock, unlike Exit block - the other possible successor of | ||||
| // middle block. | ||||
| // TODO: Should be replaced by | ||||
| // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the | ||||
| // scalar region is modeled as well. | ||||
| auto *ScalarPHVPBB = Plan.getScalarPreheader(); | ||||
| auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor()); | ||||
| VPBasicBlock *ScalarPHVPBB = nullptr; | ||||
| if (MiddleVPBB->getNumSuccessors() == 2) { | ||||
| // Order is strict: first is the exit block, second is the scalar preheader. | ||||
| ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]); | ||||
| } else if (ExitUsersToFix.empty()) { | ||||
| ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()); | ||||
| } else { | ||||
| llvm_unreachable("unsupported CFG in VPlan"); | ||||
| } | ||||
|
|
||||
| VPBuilder ScalarPHBuilder(ScalarPHVPBB); | ||||
| VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); | ||||
| VPValue *OneVPV = Plan.getOrAddLiveIn( | ||||
|
|
@@ -8973,28 +8979,30 @@ static void addLiveOutsForFirstOrderRecurrences( | |||
| // lo = lcssa.phi [s1, scalar.body], | ||||
| // [vector.recur.extract.for.phi, middle.block] | ||||
| // | ||||
| // Extract the resume value and create a new VPLiveOut for it. | ||||
| auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd, | ||||
| {FOR->getBackedgeValue(), OneVPV}, | ||||
| {}, "vector.recur.extract"); | ||||
| auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( | ||||
| VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {}, | ||||
| "scalar.recur.init"); | ||||
| auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr()); | ||||
| Plan.addLiveOut(FORPhi, ResumePhiRecipe); | ||||
|
|
||||
| // Extract the resume value. | ||||
|
||||
| // Extract the resume value. |
extract is extracted from here and placed further below, but better placed in addScalarResumePhis() as it feeds scalar loop?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(independent): ok to remove ExitIRI during traversal of ExitUsersToFix below? Better make_early_inc_range?
OTOH, is more than one LCSSA phi expected to have FOR as its operand? If not better break as soon as it is found and handled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I thought the same thing, will adjust separately.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While we're here, independent: Ext should be better (re)named, e.g., PenultimateElement.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps first collecting ExitUsersToFix, then searching for to handle and remove the FOR ones, and finally handling all remaining ones, can now be simplified and fused, to be more consistent with how addScalarResumePhis() handles scalar preheader phis. WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adjusted as suggested, will revisit and further unify as follow-up (at the latest when inductions are also modeled in VPlan)
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -455,8 +455,9 @@ void VPIRBasicBlock::execute(VPTransformState *State) { | |
| "VPIRBasicBlock can have at most two successors at the moment!"); | ||
| State->Builder.SetInsertPoint(IRBB->getTerminator()); | ||
| executeRecipes(State, IRBB); | ||
| if (getSingleSuccessor()) { | ||
| assert(isa<UnreachableInst>(IRBB->getTerminator())); | ||
| // Create a branch instruction to terminate IRBB if one was not created yet | ||
| // and is needed. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth asserting that if there are two successors (or more than one, asserted above to be <= 2), terminator isn't
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||
| if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) { | ||
| auto *Br = State->Builder.CreateBr(IRBB); | ||
| Br->setOperand(0, nullptr); | ||
| IRBB->getTerminator()->eraseFromParent(); | ||
|
|
@@ -474,7 +475,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) { | |
| // backedges. A backward successor is set when the branch is created. | ||
| const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); | ||
| unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; | ||
| assert(!TermBr->getSuccessor(idx) && | ||
| assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) && | ||
| "Trying to reset an existing successor block."); | ||
| TermBr->setSuccessor(idx, IRBB); | ||
| State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}}); | ||
|
|
@@ -843,10 +844,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, | |
| #endif | ||
|
|
||
| VPlan::~VPlan() { | ||
| for (auto &KV : LiveOuts) | ||
| delete KV.second; | ||
| LiveOuts.clear(); | ||
|
|
||
| if (Entry) { | ||
| VPValue DummyValue; | ||
| for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) | ||
|
|
@@ -878,7 +875,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
| VPIRBasicBlock *Entry = | ||
| VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader()); | ||
| VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); | ||
| auto Plan = std::make_unique<VPlan>(Entry, VecPreheader); | ||
| VPIRBasicBlock *ScalarHeader = | ||
| VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader()); | ||
| auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader); | ||
|
|
||
| // Create SCEV and VPValue for the trip count. | ||
|
|
||
|
|
@@ -909,6 +908,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
| VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); | ||
|
|
||
| VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); | ||
| VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader); | ||
| if (!RequiresScalarEpilogueCheck) { | ||
| VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
| return Plan; | ||
|
|
@@ -1032,19 +1032,8 @@ void VPlan::execute(VPTransformState *State) { | |
| BasicBlock *MiddleBB = State->CFG.ExitBB; | ||
| VPBasicBlock *MiddleVPBB = | ||
| cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor()); | ||
| // Find the VPBB for the scalar preheader, relying on the current structure | ||
| // when creating the middle block and its successrs: if there's a single | ||
| // predecessor, it must be the scalar preheader. Otherwise, the second | ||
| // successor is the scalar preheader. | ||
| BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); | ||
| auto &MiddleSuccs = MiddleVPBB->getSuccessors(); | ||
| assert((MiddleSuccs.size() == 1 || MiddleSuccs.size() == 2) && | ||
| "middle block has unexpected successors"); | ||
| VPBasicBlock *ScalarPhVPBB = cast<VPBasicBlock>( | ||
| MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]); | ||
| assert(!isa<VPIRBasicBlock>(ScalarPhVPBB) && | ||
| "scalar preheader cannot be wrapped already"); | ||
| replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh); | ||
| replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh); | ||
| replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); | ||
|
|
||
| // Disconnect the middle block from its single successor (the scalar loop | ||
|
|
@@ -1054,6 +1043,8 @@ void VPlan::execute(VPTransformState *State) { | |
| BrInst->insertBefore(MiddleBB->getTerminator()); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simplify the above "Find the VPBB for the scalar preheader, ...".
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||
| MiddleBB->getTerminator()->eraseFromParent(); | ||
| State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}}); | ||
| State->CFG.DTU.applyUpdates( | ||
| {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}}); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ScalarPh will still dominate its single successor, but we apply this deletion here to facilitate its addition later? Corresponding to disconnecting the branch and reconnecting it later.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Understood, thanks. Noting that DTU keeps the Delete and Insert, even when the branch is now retained rather than disconnected and reconnected.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think it is more convenient to keep it, as it allows the DTU update logic in VPBB to be kept generic
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fine, perhaps worth noting that this is the reason for temporarily disconnecting scalar preheader from its successor.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
|
|
||
| // Generate code in the loop pre-header and body. | ||
| for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) | ||
|
|
@@ -1172,12 +1163,6 @@ void VPlan::print(raw_ostream &O) const { | |
| Block->print(O, "", SlotTracker); | ||
| } | ||
|
|
||
| if (!LiveOuts.empty()) | ||
| O << "\n"; | ||
| for (const auto &KV : LiveOuts) { | ||
| KV.second->print(O, SlotTracker); | ||
| } | ||
|
|
||
| O << "}\n"; | ||
| } | ||
|
|
||
|
|
@@ -1214,11 +1199,6 @@ LLVM_DUMP_METHOD | |
| void VPlan::dump() const { print(dbgs()); } | ||
| #endif | ||
|
|
||
| void VPlan::addLiveOut(PHINode *PN, VPValue *V) { | ||
| assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists"); | ||
| LiveOuts.insert({PN, new VPLiveOut(PN, V)}); | ||
| } | ||
|
|
||
| static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, | ||
| DenseMap<VPValue *, VPValue *> &Old2NewVPValues) { | ||
| // Update the operands of all cloned recipes starting at NewEntry. This | ||
|
|
@@ -1262,8 +1242,16 @@ VPlan *VPlan::duplicate() { | |
| VPBasicBlock *NewPreheader = Preheader->clone(); | ||
| const auto &[NewEntry, __] = cloneFrom(Entry); | ||
|
|
||
| BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock(); | ||
| VPIRBasicBlock *NewScalarHeader = | ||
| *find_if(VPBlockUtils::blocksOnly<VPIRBasicBlock>( | ||
| vp_depth_first_shallow(NewEntry)), | ||
| [ScalarHeaderIRBB](VPIRBasicBlock *VPIRBB) { | ||
| return ScalarHeaderIRBB == VPIRBB->getIRBasicBlock(); | ||
| }); | ||
| // Create VPlan, clone live-ins and remap operands in the cloned blocks. | ||
| auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry)); | ||
| auto *NewPlan = | ||
| new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit (unrelated): should NewEntry be returned as a VPBB rather than a VPBlockBase?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do separately, thanks! |
||
| DenseMap<VPValue *, VPValue *> Old2NewVPValues; | ||
| for (VPValue *OldLiveIn : VPLiveInsToFree) { | ||
| Old2NewVPValues[OldLiveIn] = | ||
|
|
@@ -1286,10 +1274,6 @@ VPlan *VPlan::duplicate() { | |
| remapOperands(Preheader, NewPreheader, Old2NewVPValues); | ||
| remapOperands(Entry, NewEntry, Old2NewVPValues); | ||
|
|
||
| // Clone live-outs. | ||
| for (const auto &[_, LO] : LiveOuts) | ||
| NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]); | ||
|
|
||
| // Initialize remaining fields of cloned VPlan. | ||
| NewPlan->VFs = VFs; | ||
| NewPlan->UFs = UFs; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Various suggestions below, from using more consistent variable names, to introducing the extract in the middle block for FORs, so that this method takes full care of feeding scalar loop with values coming from vector loop. See what you think of any/all/none:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adjusted, thanks!