Skip to content
Merged
12 changes: 12 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,9 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
llvm_unreachable("Unhandled VPDefID");
}

InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can a better position be found, rather than between two static classof()'s?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved to the end. thanks!

static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && classof(R);
Expand Down Expand Up @@ -1410,6 +1413,9 @@ class VPIRInstruction : public VPRecipeBase {

void execute(VPTransformState &State) override;

InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

Instruction &getInstruction() { return I; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down Expand Up @@ -2326,6 +2332,9 @@ class VPInterleaveRecipe : public VPRecipeBase {
/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;

InstructionCost computeCost(ElementCount VF,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to /// document, here and elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added thanks!

VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down Expand Up @@ -2559,6 +2568,9 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
/// conditional branch.
void execute(VPTransformState &State) override;

InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down
72 changes: 42 additions & 30 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,33 +274,28 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
insertBefore(BB, I);
}

/// Return the underlying instruction to be used for computing \p R's cost via
/// the legacy cost model. Return nullptr if there's no suitable instruction.
static Instruction *getInstructionForCost(const VPRecipeBase *R) {
if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
return IG->getInsertPos();
// Currently the legacy cost model only calculates the instruction cost with
// underlying instruction. Removing the WidenMem here will prevent
// force-target-instruction-cost overwriting the cost of recipe with
// underlying instruction which is inconsistent with the legacy model.
// TODO: Remove WidenMem from this function when we don't need to compare to
// the legacy model.
if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
return &WidenMem->getIngredient();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth leaving a note, that even though widen loads and stores have an underlying instruction ("ingredient"), null is returned - in order to match legacy cost model behavior with force-target-instruction-cost?

The instruction a recipe provides for cost affects both

  • skipCostComputation() - widen loads and stores are irrelevant for skipping?
  • force-target-instruction-cost - widen loads and stores should be assigned zero instead of this overriding cost?
  • w/o force-target-instruction-cost - should be assigned zero cost?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarified the the comment about the return value for the function.

Nullptr is returned if either the recipe doesn't have an underlying instruction or computeCost is implemented and there is no need for the underlying instruction (i.e. it should not be needed to be skipped)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nullptr is returned if either the recipe doesn't have an underlying instruction or computeCost is implemented and there is no need for the underlying instruction (i.e. it should not be needed to be skipped)

Regarding the "else" part of this "or" - not needing UI because recipe is not to be skipped is fine (if it's absence from SkipCostComputation it won't be skipped), but would UI still be needed in order to apply ForceTargetInstructionCost?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has been reworked in the latest version. The function is gone and inline to ::cost(), which clarifies that it is only used to decide to skip and when to apply forced costs.

return nullptr;
}

InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
auto *UI = getInstructionForCost(this);
if (UI && Ctx.skipCostComputation(UI, VF.isVector()))
return 0;

InstructionCost RecipeCost = computeCost(VF, Ctx);
if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
RecipeCost.isValid())
RecipeCost = InstructionCost(ForceTargetInstructionCost);
// Get the underlying instruction for the recipe, if there is one. Is is used
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Get the underlying instruction for the recipe, if there is one. Is is used
// Get the underlying instruction for the recipe, if there is one. It is used

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks!

// to
// * decide if cost computation should be skipped for this recipe
// * apply forced target instr
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// * apply forced target instr
// * apply forced target instruction cost.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed thanks!

Instruction *UI = nullptr;
if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
UI = IG->getInsertPos();
else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
UI = &WidenMem->getIngredient();

InstructionCost RecipeCost;
if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
RecipeCost = 0;
} else {
RecipeCost = computeCost(VF, Ctx);
if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
RecipeCost.isValid())
RecipeCost = InstructionCost(ForceTargetInstructionCost);
}

LLVM_DEBUG({
dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
Expand All @@ -311,10 +306,12 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {

InstructionCost VPRecipeBase::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
// Compute the cost for the recipe falling back to the legacy cost model using
// the underlying instruction. If there is no underlying instruction, returns
// 0.
Instruction *UI = getInstructionForCost(this);
llvm_unreachable("subclasses should implement computeCost");
}

InstructionCost VPSingleDefRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Instruction *UI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
if (UI && isa<VPReplicateRecipe>(this)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(independent) Have VPReplicateRecipe::computeCost() take care of inserting UI into SkipCostComputation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no such implementation yet, but can be added soon! Now moved slightly closer to VPSingleDefRecipe::computeCost.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that replicate recipes by definition must have a UI, so the latter can be asserted instead of checked.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

// VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
// transform, avoid computing their cost multiple times for now.
Expand Down Expand Up @@ -864,6 +861,11 @@ void VPIRInstruction::execute(VPTransformState &State) {
State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
}

InstructionCost VPIRInstruction::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some comment explaining that this wraps an existing IR instruction on the border of VPlan's scope, hence does not contribute to VPlan's cost modeling.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

return 0;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down Expand Up @@ -2156,6 +2158,11 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
ReplaceInstWithInst(CurrentTerminator, CondBr);
}

InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some explanation for treating this branch as free.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added thanks!

return 0;
}

void VPPredInstPHIRecipe::execute(VPTransformState &State) {
assert(State.Lane && "Predicated instruction PHI works per instance.");
Instruction *ScalarPredInst =
Expand Down Expand Up @@ -2838,6 +2845,11 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif

InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
return Ctx.getLegacyCost(IG->getInsertPos(), VF);
}

void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");
Expand Down
Loading