-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) #151487
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
d585757
66dea83
d293818
a2e228e
6db99e6
0d7aeda
8ad1800
0dee36c
8574acd
d4994de
31bbac2
8b9f98f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -3278,6 +3278,52 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan, | |||||
| BTC->replaceAllUsesWith(TCMO); | ||||||
| } | ||||||
|
|
||||||
| void VPlanTransforms::materializeBuildVectors(VPlan &Plan) { | ||||||
| if (Plan.hasScalarVFOnly()) | ||||||
| return; | ||||||
|
|
||||||
| VPTypeAnalysis TypeInfo(Plan); | ||||||
| VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); | ||||||
| auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>( | ||||||
| vp_depth_first_shallow(Plan.getEntry())); | ||||||
| auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>( | ||||||
| vp_depth_first_shallow(LoopRegion->getEntry())); | ||||||
| // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes, | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "replicating" stand for "non uniform", leaving the uniform case to materializeBroadcasts()?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep |
||||||
| // excluding ones in replicate regions. Those are not materialized explicitly | ||||||
| // yet. Those vector users are still handled in VPReplicateRegion::execute(), | ||||||
| // via shouldPack(). | ||||||
| // TODO: materialize build vectors for replicating recipes in replicating | ||||||
| // regions. | ||||||
| // TODO: materialize build vectors for VPInstructions. | ||||||
| for (VPBasicBlock *VPBB : | ||||||
| concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { | ||||||
| for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||||||
| auto *RepR = dyn_cast<VPReplicateRecipe>(&R); | ||||||
| auto UsesVectorOrInsideReplicateRegions = [RepR, LoopRegion](VPUser *U) { | ||||||
|
||||||
| auto UsesVectorOrInsideReplicateRegions = [RepR, LoopRegion](VPUser *U) { | |
| auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -465,10 +465,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) { | |||||||||||||
| VPlanTransforms::removeDeadRecipes(Plan); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| /// Create a single-scalar clone of \p RepR for lane \p Lane. | ||||||||||||||
| static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder, | ||||||||||||||
| Type *IdxTy, VPReplicateRecipe *RepR, | ||||||||||||||
| VPLane Lane) { | ||||||||||||||
| /// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p | ||||||||||||||
| /// Def2LaneDefs to look up scalar definitions for operands of \RepR. | ||||||||||||||
| static VPReplicateRecipe * | ||||||||||||||
| cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, | ||||||||||||||
| VPReplicateRecipe *RepR, VPLane Lane, | ||||||||||||||
| const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) { | ||||||||||||||
| // Collect the operands at Lane, creating extracts as needed. | ||||||||||||||
| SmallVector<VPValue *> NewOps; | ||||||||||||||
| for (VPValue *Op : RepR->operands()) { | ||||||||||||||
|
|
@@ -481,6 +483,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder, | |||||||||||||
| Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op})); | ||||||||||||||
| continue; | ||||||||||||||
| } | ||||||||||||||
| // If Op is a definition that has been unrolled, directly use the clone for | ||||||||||||||
| // the corresponding lane. | ||||||||||||||
| auto LaneDefs = Def2LaneDefs.find(Op); | ||||||||||||||
| if (LaneDefs != Def2LaneDefs.end()) { | ||||||||||||||
| NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]); | ||||||||||||||
| continue; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Look through buildvector to avoid unnecessary extracts. | ||||||||||||||
| if (match(Op, m_BuildVector())) { | ||||||||||||||
| NewOps.push_back( | ||||||||||||||
|
|
@@ -513,6 +523,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { | |||||||||||||
| vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())); | ||||||||||||||
| auto VPBBsToUnroll = | ||||||||||||||
| concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion); | ||||||||||||||
| DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs; | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks |
||||||||||||||
| SmallVector<VPRecipeBase *> ToRemove; | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added thanks |
||||||||||||||
| for (VPBasicBlock *VPBB : VPBBsToUnroll) { | ||||||||||||||
| for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||||||||||||||
| auto *RepR = dyn_cast<VPReplicateRecipe>(&R); | ||||||||||||||
|
|
@@ -524,36 +536,45 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { | |||||||||||||
| if (isa<StoreInst>(RepR->getUnderlyingInstr()) && | ||||||||||||||
| vputils::isSingleScalar(RepR->getOperand(1))) { | ||||||||||||||
| // Stores to invariant addresses need to store the last lane only. | ||||||||||||||
| cloneForLane(Plan, Builder, IdxTy, RepR, | ||||||||||||||
| VPLane::getLastLaneForVF(VF)); | ||||||||||||||
| cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF), | ||||||||||||||
| Def2LaneDefs); | ||||||||||||||
| } else { | ||||||||||||||
| // Create single-scalar version of RepR for all lanes. | ||||||||||||||
| for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | ||||||||||||||
| cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)); | ||||||||||||||
| cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs); | ||||||||||||||
| } | ||||||||||||||
| RepR->eraseFromParent(); | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that the erasure of use-less RepR here removes it from being a user of its operands.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup |
||||||||||||||
| continue; | ||||||||||||||
| } | ||||||||||||||
| /// Create single-scalar version of RepR for all lanes. | ||||||||||||||
| SmallVector<VPValue *> LaneDefs; | ||||||||||||||
| for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | ||||||||||||||
| LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I))); | ||||||||||||||
| LaneDefs.push_back( | ||||||||||||||
| cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs)); | ||||||||||||||
|
|
||||||||||||||
| Def2LaneDefs[RepR] = LaneDefs; | ||||||||||||||
| /// Users that only demand the first lane can use the definition for lane | ||||||||||||||
| /// 0. | ||||||||||||||
| RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) { | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that the RUWI of removes users of RepR that use only its first lane.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup |
||||||||||||||
| return U.onlyFirstLaneUsed(RepR); | ||||||||||||||
| }); | ||||||||||||||
|
|
||||||||||||||
| // If needed, create a Build(Struct)Vector recipe to insert the scalar | ||||||||||||||
| // lane values into a vector. | ||||||||||||||
| Type *ResTy = RepR->getUnderlyingInstr()->getType(); | ||||||||||||||
| VPValue *VecRes = Builder.createNaryOp( | ||||||||||||||
| ResTy->isStructTy() ? VPInstruction::BuildStructVector | ||||||||||||||
| : VPInstruction::BuildVector, | ||||||||||||||
| LaneDefs); | ||||||||||||||
| RepR->replaceAllUsesWith(VecRes); | ||||||||||||||
| RepR->eraseFromParent(); | ||||||||||||||
| // Update each build vector user that currently has RepR as its only | ||||||||||||||
| // operand, to have all LaneDefs as its operands. | ||||||||||||||
| for (VPUser *U : to_vector(RepR->users())) { | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth explaining what's about to happen - update each build vector user that currently has RepR as its only operand, to have all LaneDefs as its operands.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks |
||||||||||||||
| auto *VPI = dyn_cast<VPInstruction>(U); | ||||||||||||||
| if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector && | ||||||||||||||
| VPI->getOpcode() != VPInstruction::BuildStructVector)) | ||||||||||||||
| continue; | ||||||||||||||
| assert(VPI->getNumOperands() == 1 && | ||||||||||||||
| "Build(Struct)Vector must have a single operand before replicating by VF""); | ||||||||||||||
| VPI->setOperand(0, LaneDefs[0]); | ||||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove VPI as a user of RepR, as done by replaceUsesWithIf(LaneDefs[0])? Also remove RepR as user of Op when cloneForLane() hooks up to Op's LaneDefs, so that dce will be able to collect all recipes left useless? Or check if RepR becomes useless after each such removal, and collect it if so.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Above will only update the users that only use the first lane. Probably best done together with adding the other operands here?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we do
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, we still need to update the operand to use LaneDefs[0] instead of RepR here, which will implicitly remove VPI as user of RepR. |
||||||||||||||
| for (VPValue *Def : drop_begin(LaneDefs)) | ||||||||||||||
| VPI->addOperand(Def); | ||||||||||||||
artagnon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||
| for (VPValue *Def : drop_begin(LaneDefs)) | |
| VPI->addOperand(Def); | |
| for (VPValue *LaneDef : drop_begin(LaneDefs)) | |
| VPI->addOperand(LaneDef); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added thanks
artagnon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| for (auto *R : reverse(ToRemove)) | |
| R->eraseFromParent(); | |
| for (auto *R : reverse(ToRemove)) { | |
| assert(R->users().empty() && "Recipe to removed expected to be free of users."); | |
| R->eraseFromParent(); | |
| } |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
::eraseFromParent will already assert that there are no remaining users.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Placement of materializeBuildVectors() before replicateByVF() is clear, but would be good to have the two materialize*() next to each other, being analogous - could materializeBroadcasts() be hoisted to also appear before replicateByVF()?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, moved up thanks