-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[VPlan] Implement VPWidenCastRecipe::computeCost(). (NFCI) #111339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Implement VPWidenCastRecipe::computeCost(). (NFCI) #111339
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Elvis Wang (ElvisWang123) ChangesThis patch implement Full diff: https://github.com/llvm/llvm-project/pull/111339.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 792e0e17dd8719..59527d27d3e7a4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7218,12 +7218,29 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
ChainOps.end());
+ auto isZExtOrSExt = [](const unsigned Opcode) -> bool {
+ return Opcode == Instruction::ZExt || Opcode == Instruction::SExt;
+ };
// Also include the operands of instructions in the chain, as the cost-model
// may mark extends as free.
+ //
+ // For ARM, some of the instruction can folded into the reducion
+ // instruction. So we need to mark all folded instructions free.
+ // For example: We can fold reduce(mul(ext(A), ext(B))) into one
+ // instruction.
for (auto *ChainOp : ChainOps) {
for (Value *Op : ChainOp->operands()) {
- if (auto *I = dyn_cast<Instruction>(Op))
+ if (auto *I = dyn_cast<Instruction>(Op)) {
ChainOpsAndOperands.insert(I);
+ if (I->getOpcode() == Instruction::Mul) {
+ auto *Ext0 = dyn_cast<Instruction>(I->getOperand(0));
+ if (Ext0 && isZExtOrSExt(Ext0->getOpcode()))
+ ChainOpsAndOperands.insert(Ext0);
+ auto *Ext1 = dyn_cast<Instruction>(I->getOperand(1));
+ if (Ext1 && isZExtOrSExt(Ext1->getOpcode()))
+ ChainOpsAndOperands.insert(Ext1);
+ }
+ }
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c4567362eaffc7..c8e074785cc877 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1557,6 +1557,10 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenCastRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 75908638532950..764090b7a0ecc2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1429,6 +1429,49 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
}
+InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ auto *SrcTy = cast<VectorType>(
+ ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));
+ auto *DestTy = cast<VectorType>(ToVectorTy(getResultType(), VF));
+ // Computes the CastContextHint from a VPWidenMemoryRecipe instruction.
+ auto ComputeCCH = [&](VPWidenMemoryRecipe *R) -> TTI::CastContextHint {
+ assert((isa<VPWidenLoadRecipe>(R) || isa<VPWidenStoreRecipe>(R)) &&
+ "Expected a load or a store!");
+
+ if (VF.isScalar())
+ return TTI::CastContextHint::Normal;
+ if (!R->isConsecutive())
+ return TTI::CastContextHint::GatherScatter;
+ if (R->isReverse())
+ return TTI::CastContextHint::Reversed;
+ if (R->isMasked())
+ return TTI::CastContextHint::Masked;
+ return TTI::CastContextHint::Normal;
+ };
+
+ TTI::CastContextHint CCH = TTI::CastContextHint::None;
+ // For Trunc, the context is the only user, which must be a
+ // VPWidenStoreRecipe.
+ if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
+ if (!cast<VPValue>(this)->hasMoreThanOneUniqueUser())
+ if (VPWidenMemoryRecipe *Store =
+ dyn_cast<VPWidenMemoryRecipe>(*this->user_begin()))
+ CCH = ComputeCCH(Store);
+ }
+ // For Z/Sext, the context is the operand, which must be a VPWidenLoadRecipe.
+ else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
+ Opcode == Instruction::FPExt) {
+ if (VPWidenMemoryRecipe *Load = dyn_cast<VPWidenMemoryRecipe>(
+ this->getOperand(0)->getDefiningRecipe()))
+ CCH = ComputeCCH(Load);
+ }
+ // Arm TTI will use the underlying instruction to determine the cost.
+ return Ctx.TTI.getCastInstrCost(
+ Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
+ dyn_cast_if_present<Instruction>(getUnderlyingValue()));
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 4c383244f96f1a..ec4d95048de7a4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -135,7 +135,7 @@ class VPValue {
}
/// Returns true if the value has more than one unique user.
- bool hasMoreThanOneUniqueUser() {
+ bool hasMoreThanOneUniqueUser() const {
if (getNumUsers() == 0)
return false;
|
| assert((isa<VPWidenLoadRecipe>(R) || isa<VPWidenStoreRecipe>(R)) && | ||
| "Expected a load or a store!"); | ||
|
|
||
| if (VF.isScalar()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems to be missing handling of VPInterleaveRecipe and VPReplicateRecipe?
And returning normal for live ins (the !Loop->contains() case in the legacy cost model)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for catching that. Handling VPInterleaveRecipe and VPReplicateRecipe in ComputeCCH.
This patch implement VPWidenCastRecipe::computeCost() and skip more cast recipies in the in-loop reduction.
8f4e5fe to
5eadd23
Compare
|
Gentle ping! |
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks, with a few remaining suggestions to simplify some comments.
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
This patch implement
VPWidenCastRecipe::computeCost()and skip cast recipies in the in-loop reduction.