diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d236111836391..93ab3353a296a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7273,6 +7273,33 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock)); } +/// Add branch weight metadata, if the \p Plan's middle block is terminated by a +/// BranchOnCond recipe. +static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, + Loop *OrigLoop) { + // 4. Adjust branch weight of the branch in the middle block. + Instruction *LatchTerm = OrigLoop->getLoopLatch()->getTerminator(); + if (!hasBranchWeightMD(*LatchTerm)) + return; + + VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock(); + auto *MiddleTerm = + dyn_cast_or_null(MiddleVPBB->getTerminator()); + // Only add branch metadata if there is a (conditional) terminator. + if (!MiddleTerm) + return; + + assert(MiddleTerm->getOpcode() == VPInstruction::BranchOnCond && + "must have a BranchOnCond"); + // Assume that `Count % VectorTripCount` is equally distributed. + unsigned TripCount = Plan.getUF() * VF.getKnownMinValue(); + assert(TripCount > 0 && "trip count should not be zero"); + MDBuilder MDB(LatchTerm->getContext()); + MDNode *BranchWeights = + MDB.createBranchWeights({1, TripCount - 1}, /*IsExpected=*/false); + MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights); +} + DenseMap LoopVectorizationPlanner::executePlan( ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) { @@ -7295,11 +7322,8 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::convertToConcreteRecipes(BestVPlan, *Legal->getWidestInductionType()); - // Retrieve and store the middle block before dissolving regions. Regions are - // dissolved after optimizing for VF and UF, which completely removes unneeded - // loop regions first. - VPBasicBlock *MiddleVPBB = - BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr; + + addBranchWeightToMiddleTerminator(BestVPlan, BestVF, OrigLoop); VPlanTransforms::dissolveLoopRegions(BestVPlan); // Perform the actual loop transformation. VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan, @@ -7442,20 +7466,6 @@ DenseMap LoopVectorizationPlanner::executePlan( ILV.printDebugTracesAtEnd(); - // 4. Adjust branch weight of the branch in the middle block. - if (HeaderVPBB) { - auto *MiddleTerm = - cast(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator()); - if (MiddleTerm->isConditional() && - hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { - // Assume that `Count % VectorTripCount` is equally distributed. - unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); - assert(TripCount > 0 && "trip count should not be zero"); - const uint32_t Weights[] = {1, TripCount - 1}; - setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); - } - } - return ExpandedSCEVs; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index acc861b991975..468284168e9ca 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -882,11 +882,39 @@ template class VPUnrollPartAccessor { unsigned getUnrollPart(VPUser &U) const; }; +/// Helper to manage IR metadata for recipes. It filters out metadata that +/// cannot be propagated. +class VPIRMetadata { + SmallVector> Metadata; + +public: + VPIRMetadata() {} + + /// Adds metatadata that can be preserved from the original instruction + /// \p I. + VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); } + + /// Adds metatadata that can be preserved from the original instruction + /// \p I and noalias metadata guaranteed by runtime checks using \p LVer. + VPIRMetadata(Instruction &I, LoopVersioning *LVer); + + /// Copy constructor for cloning. + VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} + + /// Add all metadata to \p I. + void applyMetadata(Instruction &I) const; + + void addMetadata(unsigned Kind, MDNode *Node) { + Metadata.emplace_back(Kind, Node); + } +}; + /// This is a concrete Recipe that models a single VPlan-level instruction. /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as /// the VPInstruction is also a single def-use vertex. class VPInstruction : public VPRecipeWithIRFlags, + public VPIRMetadata, public VPUnrollPartAccessor<1> { friend class VPlanSlp; @@ -976,7 +1004,7 @@ class VPInstruction : public VPRecipeWithIRFlags, VPInstruction(unsigned Opcode, ArrayRef Operands, DebugLoc DL = {}, const Twine &Name = "") : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL), - Opcode(Opcode), Name(Name.str()) {} + VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {} VPInstruction(unsigned Opcode, ArrayRef Operands, const VPIRFlags &Flags, DebugLoc DL = {}, @@ -1268,29 +1296,6 @@ struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { const VPRecipeBase *getAsRecipe() const override { return this; } }; -/// Helper to manage IR metadata for recipes. It filters out metadata that -/// cannot be propagated. -class VPIRMetadata { - SmallVector> Metadata; - -public: - VPIRMetadata() {} - - /// Adds metatadata that can be preserved from the original instruction - /// \p I. - VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); } - - /// Adds metatadata that can be preserved from the original instruction - /// \p I and noalias metadata guaranteed by runtime checks using \p LVer. - VPIRMetadata(Instruction &I, LoopVersioning *LVer); - - /// Copy constructor for cloning. - VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} - - /// Add all metadata to \p I. - void applyMetadata(Instruction &I) const; -}; - /// VPWidenRecipe is a recipe for producing a widened instruction using the /// opcode and operands of the recipe. This recipe covers most of the /// traditional vectorization cases where each recipe transforms into a diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 62b99d98a2b5e..f5a2533727b3d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -410,7 +410,7 @@ VPInstruction::VPInstruction(unsigned Opcode, ArrayRef Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name) : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL), - Opcode(Opcode), Name(Name.str()) { + VPIRMetadata(), Opcode(Opcode), Name(Name.str()) { assert(flagsValidForOpcode(getOpcode()) && "Set flags not supported for the provided opcode"); } @@ -591,7 +591,9 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::BranchOnCond: { Value *Cond = State.get(getOperand(0), VPLane(0)); - return createCondBranch(Cond, getParent(), State); + auto *Br = createCondBranch(Cond, getParent(), State); + applyMetadata(*Br); + return Br; } case VPInstruction::BranchOnCount: { // First create the compare.