@@ -4577,6 +4577,31 @@ getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
45774577 Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind,
45784578 Type *ScalarTy, VectorType *VecTy);
45794579
4580+ /// Returns the cost of the shuffle instructions with the given \p Kind, vector
4581+ /// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
4582+ /// subvector pattern.
4583+ static InstructionCost
4584+ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
4585+ VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
4586+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
4587+ int Index = 0, VectorType *SubTp = nullptr,
4588+ ArrayRef<const Value *> Args = std::nullopt) {
4589+ if (Kind != TTI::SK_PermuteTwoSrc)
4590+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
4591+ int NumSrcElts = Tp->getElementCount().getKnownMinValue();
4592+ int NumSubElts;
4593+ if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
4594+ Mask, NumSrcElts, NumSubElts, Index)) {
4595+ if (Index + NumSubElts > NumSrcElts &&
4596+ Index + NumSrcElts <= static_cast<int>(Mask.size()))
4597+ return TTI.getShuffleCost(
4598+ TTI::SK_InsertSubvector,
4599+ getWidenedType(Tp->getElementType(), Mask.size()), Mask,
4600+ TTI::TCK_RecipThroughput, Index, Tp);
4601+ }
4602+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
4603+ }
4604+
45804605BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
45814606 ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
45824607 SmallVectorImpl<Value *> &PointerOps, bool TryRecursiveCheck) const {
@@ -4783,8 +4808,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
47834808 for (int Idx : seq<int>(0, VL.size()))
47844809 ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
47854810 VecLdCost +=
4786- TTI. getShuffleCost(TTI::SK_InsertSubvector, VecTy, ShuffleMask ,
4787- CostKind, I * VF, SubVecTy);
4811+ :: getShuffleCost(TTI, TTI ::SK_InsertSubvector, VecTy,
4812+ ShuffleMask, CostKind, I * VF, SubVecTy);
47884813 }
47894814 // If masked gather cost is higher - better to vectorize, so
47904815 // consider it as a gather node. It will be better estimated
@@ -5223,7 +5248,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
52235248 InstructionCost PermuteCost =
52245249 TopToBottom
52255250 ? 0
5226- : TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, Mask);
5251+ : :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, Ty, Mask);
52275252 InstructionCost InsertFirstCost = TTI->getVectorInstrCost(
52285253 Instruction::InsertElement, Ty, TTI::TCK_RecipThroughput, 0,
52295254 PoisonValue::get(Ty), *It);
@@ -8152,31 +8177,6 @@ class BaseShuffleAnalysis {
81528177};
81538178} // namespace
81548179
8155- /// Returns the cost of the shuffle instructions with the given \p Kind, vector
8156- /// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
8157- /// subvector pattern.
8158- static InstructionCost
8159- getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
8160- VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
8161- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
8162- int Index = 0, VectorType *SubTp = nullptr,
8163- ArrayRef<const Value *> Args = std::nullopt) {
8164- if (Kind != TTI::SK_PermuteTwoSrc)
8165- return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
8166- int NumSrcElts = Tp->getElementCount().getKnownMinValue();
8167- int NumSubElts;
8168- if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
8169- Mask, NumSrcElts, NumSubElts, Index)) {
8170- if (Index + NumSubElts > NumSrcElts &&
8171- Index + NumSrcElts <= static_cast<int>(Mask.size()))
8172- return TTI.getShuffleCost(
8173- TTI::SK_InsertSubvector,
8174- getWidenedType(Tp->getElementType(), Mask.size()), Mask,
8175- TTI::TCK_RecipThroughput, Index, Tp);
8176- }
8177- return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
8178- }
8179-
81808180/// Calculate the scalar and the vector costs from vectorizing set of GEPs.
81818181static std::pair<InstructionCost, InstructionCost>
81828182getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
@@ -8546,8 +8546,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
85468546 for (unsigned I = VF, E = VL.size(); I < E; I += VF) {
85478547 for (unsigned Idx : seq<unsigned>(0, E))
85488548 ShuffleMask[Idx] = Idx / VF == I ? E + Idx % VF : Idx;
8549- GatherCost += TTI. getShuffleCost(TTI::SK_InsertSubvector, VecTy,
8550- ShuffleMask, CostKind, I, LoadTy);
8549+ GatherCost += :: getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy,
8550+ ShuffleMask, CostKind, I, LoadTy);
85518551 }
85528552 }
85538553 GatherCost -= ScalarsCost;
@@ -8574,10 +8574,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
85748574 InstructionCost InsertCost =
85758575 TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0,
85768576 PoisonValue::get(VecTy), *It);
8577- return InsertCost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
8578- VecTy, ShuffleMask, CostKind,
8579- /*Index=*/0, /*SubTp=*/nullptr,
8580- /*Args=*/*It);
8577+ return InsertCost + ::getShuffleCost(TTI,
8578+ TargetTransformInfo::SK_Broadcast,
8579+ VecTy, ShuffleMask, CostKind,
8580+ /*Index=*/0, /*SubTp=*/nullptr,
8581+ /*Args=*/*It);
85818582 }
85828583 return GatherCost +
85838584 (all_of(Gathers, IsaPred<UndefValue>)
@@ -8801,8 +8802,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
88018802 cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
88028803 if (isEmptyOrIdentity(Mask, VF))
88038804 return TTI::TCC_Free;
8804- return TTI. getShuffleCost(TTI::SK_PermuteSingleSrc,
8805- cast<VectorType>(V1->getType()), Mask);
8805+ return :: getShuffleCost(TTI, TTI::SK_PermuteSingleSrc,
8806+ cast<VectorType>(V1->getType()), Mask);
88068807 }
88078808 InstructionCost createIdentity(Value *) const { return TTI::TCC_Free; }
88088809 InstructionCost createPoison(Type *Ty, unsigned VF) const {
@@ -9460,7 +9461,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
94609461 ::addMask(Mask, E->ReuseShuffleIndices);
94619462 if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
94629463 CommonCost =
9463- TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
9464+ :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
94649465 assert((E->State == TreeEntry::Vectorize ||
94659466 E->State == TreeEntry::ScatterVectorize ||
94669467 E->State == TreeEntry::StridedVectorize) &&
@@ -9721,8 +9722,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
97219722 // we can merge this shuffle with the following SK_Select.
97229723 auto *InsertVecTy = getWidenedType(ScalarTy, InsertVecSz);
97239724 if (!IsIdentity)
9724- Cost += TTI-> getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
9725- InsertVecTy, Mask);
9725+ Cost += :: getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
9726+ InsertVecTy, Mask);
97269727 auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
97279728 return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
97289729 }));
@@ -9736,9 +9737,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
97369737 if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
97379738 if (InsertVecSz != VecSz) {
97389739 auto *ActualVecTy = getWidenedType(ScalarTy, VecSz);
9739- Cost += TTI-> getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
9740- std::nullopt, CostKind, OffsetBeg - Offset,
9741- InsertVecTy);
9740+ Cost += :: getShuffleCost(*TTI, TTI::SK_InsertSubvector, ActualVecTy,
9741+ std::nullopt, CostKind, OffsetBeg - Offset,
9742+ InsertVecTy);
97429743 } else {
97439744 for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I)
97449745 Mask[I] = InMask.test(I) ? PoisonMaskElem : I;
@@ -9867,8 +9868,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
98679868 if (CondNumElements != VecTyNumElements) {
98689869 // When the return type is i1 but the source is fixed vector type, we
98699870 // need to duplicate the condition value.
9870- VecCost += TTI-> getShuffleCost(
9871- TTI::SK_PermuteSingleSrc, CondType,
9871+ VecCost += :: getShuffleCost(
9872+ *TTI, TTI::SK_PermuteSingleSrc, CondType,
98729873 createReplicatedMask(VecTyNumElements / CondNumElements,
98739874 CondNumElements));
98749875 }
@@ -10851,9 +10852,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1085110852 SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
1085210853 std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
1085310854 OrigMask.begin());
10854- C = TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc,
10855- getWidenedType(TE->getMainOp()->getType(), VecVF),
10856- OrigMask);
10855+ C = :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
10856+ getWidenedType(TE->getMainOp()->getType(), VecVF),
10857+ OrigMask);
1085710858 LLVM_DEBUG(
1085810859 dbgs() << "SLP: Adding cost " << C
1085910860 << " for final shuffle of insertelement external users.\n";
@@ -10883,7 +10884,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1088310884 static_cast<int>(Data.index()) == Data.value());
1088410885 })) {
1088510886 InstructionCost C =
10886- TTI-> getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
10887+ :: getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FTy, Mask);
1088710888 LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
1088810889 << " for final shuffle of insertelement "
1088910890 "external users.\n";
@@ -11584,8 +11585,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1158411585 TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /*Insert*/ true,
1158511586 /*Extract*/ false, CostKind);
1158611587 if (DuplicateNonConst)
11587- Cost += TTI-> getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
11588- VecTy, ShuffleMask);
11588+ Cost += :: getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
11589+ VecTy, ShuffleMask);
1158911590 return Cost;
1159011591}
1159111592
0 commit comments