-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Set branch weight metadata on middle term in VPlan (NFC) #143035
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7273,6 +7273,33 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( | |
| BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock)); | ||
| } | ||
|
|
||
| /// Add branch weight metadata, if the \p Plan's middle block is terminated by a | ||
| /// BranchOnCond recipe. | ||
| static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, | ||
| Loop *OrigLoop) { | ||
| // 4. Adjust branch weight of the branch in the middle block. | ||
| Instruction *LatchTerm = OrigLoop->getLoopLatch()->getTerminator(); | ||
| if (!hasBranchWeightMD(*LatchTerm)) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that weights are added to the new middle terminator only if the original latch terminator has weights, although the weights themselves are independent. Suffice to indicate if VPlan should introduce branch weights by noting if the original loop has any? |
||
| return; | ||
|
|
||
| VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock(); | ||
| auto *MiddleTerm = | ||
| dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator()); | ||
|
||
| // Only add branch metadata if there is a (conditional) terminator. | ||
| if (!MiddleTerm) | ||
| return; | ||
|
|
||
| assert(MiddleTerm->getOpcode() == VPInstruction::BranchOnCond && | ||
| "must have a BranchOnCond"); | ||
| // Assume that `Count % VectorTripCount` is equally distributed. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| unsigned TripCount = Plan.getUF() * VF.getKnownMinValue(); | ||
|
||
| assert(TripCount > 0 && "trip count should not be zero"); | ||
| MDBuilder MDB(LatchTerm->getContext()); | ||
| MDNode *BranchWeights = | ||
| MDB.createBranchWeights({1, TripCount - 1}, /*IsExpected=*/false); | ||
| MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternative to introducing |
||
| } | ||
|
|
||
| DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | ||
| ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, | ||
| InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) { | ||
|
|
@@ -7295,11 +7322,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
|
|
||
| VPlanTransforms::convertToConcreteRecipes(BestVPlan, | ||
| *Legal->getWidestInductionType()); | ||
| // Retrieve and store the middle block before dissolving regions. Regions are | ||
| // dissolved after optimizing for VF and UF, which completely removes unneeded | ||
| // loop regions first. | ||
|
Comment on lines
-7298
to
-7300
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part of the comment explains the positioning of |
||
| VPBasicBlock *MiddleVPBB = | ||
| BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr; | ||
|
|
||
| addBranchWeightToMiddleTerminator(BestVPlan, BestVF, OrigLoop); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this be a VPlanTransform? |
||
| VPlanTransforms::dissolveLoopRegions(BestVPlan); | ||
| // Perform the actual loop transformation. | ||
| VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan, | ||
|
|
@@ -7442,20 +7466,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
|
|
||
| ILV.printDebugTracesAtEnd(); | ||
|
|
||
| // 4. Adjust branch weight of the branch in the middle block. | ||
| if (HeaderVPBB) { | ||
| auto *MiddleTerm = | ||
| cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator()); | ||
| if (MiddleTerm->isConditional() && | ||
| hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { | ||
| // Assume that `Count % VectorTripCount` is equally distributed. | ||
| unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); | ||
| assert(TripCount > 0 && "trip count should not be zero"); | ||
| const uint32_t Weights[] = {1, TripCount - 1}; | ||
| setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); | ||
| } | ||
| } | ||
|
|
||
| return ExpandedSCEVs; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -882,11 +882,39 @@ template <unsigned PartOpIdx> class VPUnrollPartAccessor { | |
| unsigned getUnrollPart(VPUser &U) const; | ||
| }; | ||
|
|
||
| /// Helper to manage IR metadata for recipes. It filters out metadata that | ||
| /// cannot be propagated. | ||
| class VPIRMetadata { | ||
| SmallVector<std::pair<unsigned, MDNode *>> Metadata; | ||
|
|
||
| public: | ||
| VPIRMetadata() {} | ||
|
|
||
| /// Adds metatadata that can be preserved from the original instruction | ||
| /// \p I. | ||
| VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); } | ||
|
|
||
| /// Adds metatadata that can be preserved from the original instruction | ||
| /// \p I and noalias metadata guaranteed by runtime checks using \p LVer. | ||
| VPIRMetadata(Instruction &I, LoopVersioning *LVer); | ||
|
|
||
| /// Copy constructor for cloning. | ||
| VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} | ||
|
|
||
| /// Add all metadata to \p I. | ||
| void applyMetadata(Instruction &I) const; | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The newly introduced |
||
| void addMetadata(unsigned Kind, MDNode *Node) { | ||
| Metadata.emplace_back(Kind, Node); | ||
| } | ||
| }; | ||
|
|
||
| /// This is a concrete Recipe that models a single VPlan-level instruction. | ||
| /// While as any Recipe it may generate a sequence of IR instructions when | ||
| /// executed, these instructions would always form a single-def expression as | ||
| /// the VPInstruction is also a single def-use vertex. | ||
| class VPInstruction : public VPRecipeWithIRFlags, | ||
| public VPIRMetadata, | ||
| public VPUnrollPartAccessor<1> { | ||
| friend class VPlanSlp; | ||
|
|
||
|
|
@@ -976,7 +1004,7 @@ class VPInstruction : public VPRecipeWithIRFlags, | |
| VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL = {}, | ||
| const Twine &Name = "") | ||
| : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL), | ||
| Opcode(Opcode), Name(Name.str()) {} | ||
| VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {} | ||
|
|
||
| VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, | ||
| const VPIRFlags &Flags, DebugLoc DL = {}, | ||
|
|
@@ -1268,29 +1296,6 @@ struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { | |
| const VPRecipeBase *getAsRecipe() const override { return this; } | ||
| }; | ||
|
|
||
| /// Helper to manage IR metadata for recipes. It filters out metadata that | ||
| /// cannot be propagated. | ||
| class VPIRMetadata { | ||
| SmallVector<std::pair<unsigned, MDNode *>> Metadata; | ||
|
|
||
| public: | ||
| VPIRMetadata() {} | ||
|
|
||
| /// Adds metatadata that can be preserved from the original instruction | ||
| /// \p I. | ||
| VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); } | ||
|
|
||
| /// Adds metatadata that can be preserved from the original instruction | ||
| /// \p I and noalias metadata guaranteed by runtime checks using \p LVer. | ||
| VPIRMetadata(Instruction &I, LoopVersioning *LVer); | ||
|
|
||
| /// Copy constructor for cloning. | ||
| VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} | ||
|
|
||
| /// Add all metadata to \p I. | ||
| void applyMetadata(Instruction &I) const; | ||
| }; | ||
|
|
||
| /// VPWidenRecipe is a recipe for producing a widened instruction using the | ||
| /// opcode and operands of the recipe. This recipe covers most of the | ||
| /// traditional vectorization cases where each recipe transforms into a | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drop "4", along with the entire line - being contained in the documentation of the function above?