@@ -260,20 +260,6 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
260260 VF * getNumElements(ScalarTy));
261261}
262262
263- /// Returns the number of elements of the given type \p Ty, not less than \p Sz,
264- /// which forms type, which splits by \p TTI into whole vector types during
265- /// legalization.
266- static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI,
267- Type *Ty, unsigned Sz) {
268- if (!isValidElementType(Ty))
269- return bit_ceil(Sz);
270- // Find the number of elements, which forms full vectors.
271- const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz));
272- if (NumParts == 0 || NumParts >= Sz)
273- return bit_ceil(Sz);
274- return bit_ceil(divideCeil(Sz, NumParts)) * NumParts;
275- }
276-
277263static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
278264 SmallVectorImpl<int> &Mask) {
279265 // The ShuffleBuilder implementation use shufflevector to splat an "element".
@@ -408,7 +394,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) {
408394/// total number of elements \p Size and number of registers (parts) \p
409395/// NumParts.
410396static unsigned getPartNumElems(unsigned Size, unsigned NumParts) {
411- return std::min<unsigned>(Size, bit_ceil( divideCeil(Size, NumParts) ));
397+ return PowerOf2Ceil( divideCeil(Size, NumParts));
412398}
413399
414400/// Returns correct remaining number of elements, considering total amount \p
@@ -1236,22 +1222,6 @@ static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
12361222 (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
12371223}
12381224
1239- /// Returns true if widened type of \p Ty elements with size \p Sz represents
1240- /// full vector type, i.e. adding extra element results in extra parts upon type
1241- /// legalization.
1242- static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty,
1243- unsigned Sz) {
1244- if (Sz <= 1)
1245- return false;
1246- if (!isValidElementType(Ty) && !isa<FixedVectorType>(Ty))
1247- return false;
1248- if (has_single_bit(Sz))
1249- return true;
1250- const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz));
1251- return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) &&
1252- Sz % NumParts == 0;
1253- }
1254-
12551225namespace slpvectorizer {
12561226
12571227/// Bottom Up SLP Vectorizer.
@@ -3341,15 +3311,6 @@ class BoUpSLP {
33413311 /// Return true if this is a non-power-of-2 node.
33423312 bool isNonPowOf2Vec() const {
33433313 bool IsNonPowerOf2 = !has_single_bit(Scalars.size());
3344- return IsNonPowerOf2;
3345- }
3346-
3347- /// Return true if this is a node, which tries to vectorize number of
3348- /// elements, forming whole vectors.
3349- bool
3350- hasNonWholeRegisterOrNonPowerOf2Vec(const TargetTransformInfo &TTI) const {
3351- bool IsNonPowerOf2 = !hasFullVectorsOrPowerOf2(
3352- TTI, getValueType(Scalars.front()), Scalars.size());
33533314 assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) &&
33543315 "Reshuffling not supported with non-power-of-2 vectors yet.");
33553316 return IsNonPowerOf2;
@@ -3469,10 +3430,8 @@ class BoUpSLP {
34693430 Last->State = EntryState;
34703431 // FIXME: Remove once support for ReuseShuffleIndices has been implemented
34713432 // for non-power-of-two vectors.
3472- assert(
3473- (hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()), VL.size()) ||
3474- ReuseShuffleIndices.empty()) &&
3475- "Reshuffling scalars not yet supported for nodes with padding");
3433+ assert((has_single_bit(VL.size()) || ReuseShuffleIndices.empty()) &&
3434+ "Reshuffling scalars not yet supported for nodes with padding");
34763435 Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
34773436 ReuseShuffleIndices.end());
34783437 if (ReorderIndices.empty()) {
@@ -5310,7 +5269,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
53105269 // node.
53115270 if (!TE.ReuseShuffleIndices.empty()) {
53125271 // FIXME: Support ReuseShuffleIndices for non-power-of-two vectors.
5313- assert(!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI ) &&
5272+ assert(!TE.isNonPowOf2Vec( ) &&
53145273 "Reshuffling scalars not yet supported for nodes with padding");
53155274
53165275 if (isSplat(TE.Scalars))
@@ -5550,7 +5509,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
55505509 }
55515510 // FIXME: Remove the non-power-of-two check once findReusedOrderedScalars
55525511 // has been auditted for correctness with non-power-of-two vectors.
5553- if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI ))
5512+ if (!TE.isNonPowOf2Vec( ))
55545513 if (std::optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
55555514 return CurrentOrder;
55565515 }
@@ -5703,18 +5662,15 @@ void BoUpSLP::reorderTopToBottom() {
57035662 });
57045663
57055664 // Reorder the graph nodes according to their vectorization factor.
5706- for (unsigned VF = VectorizableTree.front()->getVectorFactor();
5707- !VFToOrderedEntries.empty() && VF > 1; VF -= 2 - (VF & 1U) ) {
5665+ for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
5666+ VF = bit_ceil (VF) / 2 ) {
57085667 auto It = VFToOrderedEntries.find(VF);
57095668 if (It == VFToOrderedEntries.end())
57105669 continue;
57115670 // Try to find the most profitable order. We just are looking for the most
57125671 // used order and reorder scalar elements in the nodes according to this
57135672 // mostly used order.
57145673 ArrayRef<TreeEntry *> OrderedEntries = It->second.getArrayRef();
5715- // Delete VF entry upon exit.
5716- auto Cleanup = make_scope_exit([&]() { VFToOrderedEntries.erase(It); });
5717-
57185674 // All operands are reordered and used only in this node - propagate the
57195675 // most used order to the user node.
57205676 MapVector<OrdersType, unsigned,
@@ -7573,36 +7529,33 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
75737529 UniqueValues.emplace_back(V);
75747530 }
75757531 size_t NumUniqueScalarValues = UniqueValues.size();
7576- bool IsFullVectors = hasFullVectorsOrPowerOf2(
7577- *TTI, UniqueValues.front()->getType(), NumUniqueScalarValues);
7578- if (NumUniqueScalarValues == VL.size() &&
7579- (VectorizeNonPowerOf2 || IsFullVectors)) {
7532+ if (NumUniqueScalarValues == VL.size()) {
75807533 ReuseShuffleIndices.clear();
75817534 } else {
75827535 // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
7583- if ((UserTreeIdx.UserTE &&
7584- UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) ||
7585- !has_single_bit(VL.size())) {
7536+ if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) ||
7537+ !llvm::has_single_bit(VL.size())) {
75867538 LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
75877539 "for nodes with padding.\n");
75887540 newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
75897541 return false;
75907542 }
75917543 LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
7592- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
7593- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
7594- return isa<UndefValue>(V) || !isConstant(V);
7595- }))) {
7544+ if (NumUniqueScalarValues <= 1 ||
7545+ (UniquePositions.size() == 1 && all_of(UniqueValues,
7546+ [](Value *V) {
7547+ return isa<UndefValue>(V) ||
7548+ !isConstant(V);
7549+ })) ||
7550+ !llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
75967551 if (DoNotFail && UniquePositions.size() > 1 &&
75977552 NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
75987553 all_of(UniqueValues, [=](Value *V) {
75997554 return isa<ExtractElementInst>(V) ||
76007555 areAllUsersVectorized(cast<Instruction>(V),
76017556 UserIgnoreList);
76027557 })) {
7603- // Find the number of elements, which forms full vectors.
7604- unsigned PWSz = getFullVectorNumberOfElements(
7605- *TTI, UniqueValues.front()->getType(), UniqueValues.size());
7558+ unsigned PWSz = PowerOf2Ceil(UniqueValues.size());
76067559 if (PWSz == VL.size()) {
76077560 ReuseShuffleIndices.clear();
76087561 } else {
@@ -9840,6 +9793,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
98409793 return nullptr;
98419794 Value *VecBase = nullptr;
98429795 ArrayRef<Value *> VL = E->Scalars;
9796+ // If the resulting type is scalarized, do not adjust the cost.
9797+ if (NumParts == VL.size())
9798+ return nullptr;
98439799 // Check if it can be considered reused if same extractelements were
98449800 // vectorized already.
98459801 bool PrevNodeFound = any_of(
@@ -10494,7 +10450,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1049410450 InsertMask[Idx] = I + 1;
1049510451 }
1049610452 unsigned VecScalarsSz = PowerOf2Ceil(NumElts);
10497- if (NumOfParts > 0 && NumOfParts < NumElts )
10453+ if (NumOfParts > 0)
1049810454 VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts);
1049910455 unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) *
1050010456 VecScalarsSz;
@@ -17829,7 +17785,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
1782917785 for (unsigned I = NextInst; I < MaxInst; ++I) {
1783017786 unsigned ActualVF = std::min(MaxInst - I, VF);
1783117787
17832- if (!hasFullVectorsOrPowerOf2(*TTI, ScalarTy, ActualVF))
17788+ if (!has_single_bit( ActualVF))
1783317789 continue;
1783417790
1783517791 if (MaxVFOnly && ActualVF < MaxVF)
0 commit comments