diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 97677f97b90da..68a62638b9d58 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -930,6 +930,10 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { const Instruction *getUnderlyingInstr() const { return cast(getUnderlyingValue()); } + + /// Return the cost of this VPSingleDefRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; }; /// Class to record LLVM IR flag for a recipe along with it. @@ -1411,6 +1415,10 @@ class VPIRInstruction : public VPRecipeBase { void execute(VPTransformState &State) override; + /// Return the cost of this VPIRInstruction. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + Instruction &getInstruction() { return I; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2391,6 +2399,10 @@ class VPInterleaveRecipe : public VPRecipeBase { /// Generate the wide load or store, and shuffles. void execute(VPTransformState &State) override; + /// Return the cost of this VPInterleaveRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, @@ -2624,6 +2636,10 @@ class VPBranchOnMaskRecipe : public VPRecipeBase { /// conditional branch. void execute(VPTransformState &State) override; + /// Return the cost of this VPBranchOnMaskRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2468616be0bd7..ba94cd2958766 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -280,33 +280,28 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, insertBefore(BB, I); } -/// Return the underlying instruction to be used for computing \p R's cost via -/// the legacy cost model. Return nullptr if there's no suitable instruction. -static Instruction *getInstructionForCost(const VPRecipeBase *R) { - if (auto *S = dyn_cast(R)) - return dyn_cast_or_null(S->getUnderlyingValue()); - if (auto *IG = dyn_cast(R)) - return IG->getInsertPos(); - // Currently the legacy cost model only calculates the instruction cost with - // underlying instruction. Removing the WidenMem here will prevent - // force-target-instruction-cost overwriting the cost of recipe with - // underlying instruction which is inconsistent with the legacy model. - // TODO: Remove WidenMem from this function when we don't need to compare to - // the legacy model. - if (auto *WidenMem = dyn_cast(R)) - return &WidenMem->getIngredient(); - return nullptr; -} - InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { - auto *UI = getInstructionForCost(this); - if (UI && Ctx.skipCostComputation(UI, VF.isVector())) - return 0; - - InstructionCost RecipeCost = computeCost(VF, Ctx); - if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 && - RecipeCost.isValid()) - RecipeCost = InstructionCost(ForceTargetInstructionCost); + // Get the underlying instruction for the recipe, if there is one. It is used + // to + // * decide if cost computation should be skipped for this recipe, + // * apply forced target instruction cost. + Instruction *UI = nullptr; + if (auto *S = dyn_cast(this)) + UI = dyn_cast_or_null(S->getUnderlyingValue()); + else if (auto *IG = dyn_cast(this)) + UI = IG->getInsertPos(); + else if (auto *WidenMem = dyn_cast(this)) + UI = &WidenMem->getIngredient(); + + InstructionCost RecipeCost; + if (UI && Ctx.skipCostComputation(UI, VF.isVector())) { + RecipeCost = 0; + } else { + RecipeCost = computeCost(VF, Ctx); + if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 && + RecipeCost.isValid()) + RecipeCost = InstructionCost(ForceTargetInstructionCost); + } LLVM_DEBUG({ dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": "; @@ -317,11 +312,14 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { InstructionCost VPRecipeBase::computeCost(ElementCount VF, VPCostContext &Ctx) const { - // Compute the cost for the recipe falling back to the legacy cost model using - // the underlying instruction. If there is no underlying instruction, returns - // 0. - Instruction *UI = getInstructionForCost(this); - if (UI && isa(this)) { + llvm_unreachable("subclasses should implement computeCost"); +} + +InstructionCost VPSingleDefRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + Instruction *UI = dyn_cast_or_null(getUnderlyingValue()); + if (isa(this)) { + assert(UI && "VPReplicateRecipe must have an underlying instruction"); // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan // transform, avoid computing their cost multiple times for now. Ctx.SkipCostComputation.insert(UI); @@ -870,6 +868,13 @@ void VPIRInstruction::execute(VPTransformState &State) { State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator())); } +InstructionCost VPIRInstruction::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + // The recipe wraps an existing IR instruction on the border of VPlan's scope, + // hence it does not contribute to the cost-modeling for the VPlan. + return 0; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -2210,6 +2215,14 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) { ReplaceInstWithInst(CurrentTerminator, CondBr); } +InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + // The legacy cost model doesn't assign costs to branches for individual + // replicate regions. Match the current behavior in the VPlan cost model for + // now. + return 0; +} + void VPPredInstPHIRecipe::execute(VPTransformState &State) { assert(State.Lane && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = @@ -2892,6 +2905,11 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + return Ctx.getLegacyCost(IG->getInsertPos(), VF); +} + void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { Value *Start = getStartValue()->getLiveInIRValue(); PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");