Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7258,12 +7258,30 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
ChainOps.end());
auto IsZExtOrSExt = [](const unsigned Opcode) -> bool {
return Opcode == Instruction::ZExt || Opcode == Instruction::SExt;
};
// Also include the operands of instructions in the chain, as the cost-model
// may mark extends as free.
//
// For ARM, some of the instruction can folded into the reducion
// instruction. So we need to mark all folded instructions free.
// For example: We can fold reduce(mul(ext(A), ext(B))) into one
// instruction.
for (auto *ChainOp : ChainOps) {
for (Value *Op : ChainOp->operands()) {
if (auto *I = dyn_cast<Instruction>(Op))
if (auto *I = dyn_cast<Instruction>(Op)) {
ChainOpsAndOperands.insert(I);
if (I->getOpcode() == Instruction::Mul) {
auto *Ext0 = dyn_cast<Instruction>(I->getOperand(0));
auto *Ext1 = dyn_cast<Instruction>(I->getOperand(1));
if (Ext0 && IsZExtOrSExt(Ext0->getOpcode()) && Ext1 &&
Ext0->getOpcode() == Ext1->getOpcode()) {
ChainOpsAndOperands.insert(Ext0);
ChainOpsAndOperands.insert(Ext1);
}
}
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1571,6 +1571,10 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;

/// Return the cost of this VPWidenCastRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down
49 changes: 49 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1462,6 +1462,55 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
}

InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
// Computes the CastContextHint from a recipes that may access memory.
auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
if (VF.isScalar())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be missing handling of VPInterleaveRecipe and VPReplicateRecipe?

And returning normal for live ins (the !Loop->contains() case in the legacy cost model)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching that. Handling VPInterleaveRecipe and VPReplicateRecipe in ComputeCCH.

return TTI::CastContextHint::Normal;
if (isa<VPInterleaveRecipe>(R))
return TTI::CastContextHint::Interleave;
if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
: TTI::CastContextHint::Normal;
const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
if (WidenMemoryRecipe == nullptr)
return TTI::CastContextHint::None;
if (!WidenMemoryRecipe->isConsecutive())
return TTI::CastContextHint::GatherScatter;
if (WidenMemoryRecipe->isReverse())
return TTI::CastContextHint::Reversed;
if (WidenMemoryRecipe->isMasked())
return TTI::CastContextHint::Masked;
return TTI::CastContextHint::Normal;
};

VPValue *Operand = getOperand(0);
TTI::CastContextHint CCH = TTI::CastContextHint::None;
// For Trunc/FPTrunc, get the context from the only user.
if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
!hasMoreThanOneUniqueUser() && getNumUsers() > 0) {
if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
CCH = ComputeCCH(StoreRecipe);
}
// For Z/Sext, get the context from the operand.
else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
Opcode == Instruction::FPExt) {
if (Operand->isLiveIn())
CCH = TTI::CastContextHint::Normal;
else if (Operand->getDefiningRecipe())
CCH = ComputeCCH(Operand->getDefiningRecipe());
}

auto *SrcTy =
cast<VectorType>(ToVectorTy(Ctx.Types.inferScalarType(Operand), VF));
auto *DestTy = cast<VectorType>(ToVectorTy(getResultType(), VF));
// Arm TTI will use the underlying instruction to determine the cost.
return Ctx.TTI.getCastInstrCost(
Opcode, DestTy, SrcTy, CCH, TTI::TCK_RecipThroughput,
dyn_cast_if_present<Instruction>(getUnderlyingValue()));
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class VPValue {
}

/// Returns true if the value has more than one unique user.
bool hasMoreThanOneUniqueUser() {
bool hasMoreThanOneUniqueUser() const {
if (getNumUsers() == 0)
return false;

Expand Down
Loading