@@ -260,20 +260,6 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
260260 VF * getNumElements(ScalarTy));
261261}
262262
263- /// Returns the number of elements of the given type \p Ty, not less than \p Sz,
264- /// which forms type, which splits by \p TTI into whole vector types during
265- /// legalization.
266- static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI,
267- Type *Ty, unsigned Sz) {
268- if (!isValidElementType(Ty))
269- return PowerOf2Ceil(Sz);
270- // Find the number of elements, which forms full vectors.
271- const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz));
272- if (NumParts == 0 || NumParts == Sz)
273- return PowerOf2Ceil(Sz);
274- return PowerOf2Ceil(divideCeil(Sz, NumParts)) * NumParts;
275- }
276-
277263static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements,
278264 SmallVectorImpl<int> &Mask) {
279265 // The ShuffleBuilder implementation use shufflevector to splat an "element".
@@ -1238,22 +1224,6 @@ static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
12381224 (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
12391225}
12401226
1241- /// Returns true if widened type of \p Ty elements with size \p Sz represents
1242- /// full vector type, i.e. adding extra element results in extra parts upon type
1243- /// legalization.
1244- static bool hasFullVectorsOnly(const TargetTransformInfo &TTI, Type *Ty,
1245- unsigned Sz) {
1246- if (Sz <= 1)
1247- return false;
1248- if (!isValidElementType(Ty) && !isa<FixedVectorType>(Ty))
1249- return false;
1250- if (has_single_bit(Sz))
1251- return true;
1252- const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz));
1253- return NumParts > 0 && NumParts != Sz && has_single_bit(Sz / NumParts) &&
1254- Sz % NumParts == 0;
1255- }
1256-
12571227namespace slpvectorizer {
12581228
12591229/// Bottom Up SLP Vectorizer.
@@ -2497,9 +2467,7 @@ class BoUpSLP {
24972467 }
24982468 // TODO: Check if we can remove a check for non-power-2 number of
24992469 // scalars after full support of non-power-2 vectorization.
2500- return UniqueValues.size() != 2 &&
2501- hasFullVectorsOnly(*R.TTI, (*UniqueValues.begin())->getType(),
2502- UniqueValues.size());
2470+ return UniqueValues.size() != 2 && has_single_bit(UniqueValues.size());
25032471 };
25042472
25052473 // If the initial strategy fails for any of the operand indexes, then we
@@ -3308,9 +3276,8 @@ class BoUpSLP {
33083276 SmallVectorImpl<Value *> *AltScalars = nullptr) const;
33093277
33103278 /// Return true if this is a non-power-of-2 node.
3311- bool isNonPowOf2Vec(const TargetTransformInfo &TTI) const {
3312- bool IsNonPowerOf2 = !hasFullVectorsOnly(
3313- TTI, getValueType(Scalars.front()), Scalars.size());
3279+ bool isNonPowOf2Vec() const {
3280+ bool IsNonPowerOf2 = !has_single_bit(Scalars.size());
33143281 assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) &&
33153282 "Reshuffling not supported with non-power-of-2 vectors yet.");
33163283 return IsNonPowerOf2;
@@ -3488,7 +3455,7 @@ class BoUpSLP {
34883455
34893456 if (UserTreeIdx.UserTE) {
34903457 Last->UserTreeIndices.push_back(UserTreeIdx);
3491- assert((!Last->isNonPowOf2Vec(*TTI ) || Last->ReorderIndices.empty()) &&
3458+ assert((!Last->isNonPowOf2Vec() || Last->ReorderIndices.empty()) &&
34923459 "Reordering isn't implemented for non-power-of-2 nodes yet");
34933460 }
34943461 return Last;
@@ -4394,7 +4361,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
43944361 if (!isValidElementType(ScalarTy))
43954362 return std::nullopt;
43964363 auto *VecTy = getWidenedType(ScalarTy, NumScalars);
4397- int NumParts = TTI->getRegUsageForType (VecTy);
4364+ int NumParts = TTI->getNumberOfParts (VecTy);
43984365 if (NumParts == 0 || NumParts >= NumScalars)
43994366 NumParts = 1;
44004367 SmallVector<int> ExtractMask;
@@ -4766,7 +4733,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
47664733 // Check the order of pointer operands or that all pointers are the same.
47674734 bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order);
47684735 // FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
4769- if (!Order.empty() && !hasFullVectorsOnly(*TTI, ScalarTy, Sz )) {
4736+ if (!Order.empty() && !has_single_bit(VL.size() )) {
47704737 assert(VectorizeNonPowerOf2 && "non-power-of-2 number of loads only "
47714738 "supported with VectorizeNonPowerOf2");
47724739 return LoadsState::Gather;
@@ -4820,13 +4787,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
48204787 });
48214788 });
48224789 const unsigned AbsoluteDiff = std::abs(*Diff);
4823- if (IsPossibleStrided &&
4824- (IsAnyPointerUsedOutGraph ||
4825- ((Sz > MinProfitableStridedLoads ||
4826- (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4827- hasFullVectorsOnly(*TTI, ScalarTy, AbsoluteDiff))) &&
4828- AbsoluteDiff > Sz) ||
4829- *Diff == -(static_cast<int>(Sz) - 1))) {
4790+ if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
4791+ ((Sz > MinProfitableStridedLoads ||
4792+ (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
4793+ has_single_bit(AbsoluteDiff))) &&
4794+ AbsoluteDiff > Sz) ||
4795+ *Diff == -(static_cast<int>(Sz) - 1))) {
48304796 int Stride = *Diff / static_cast<int>(Sz - 1);
48314797 if (*Diff == Stride * static_cast<int>(Sz - 1)) {
48324798 Align Alignment =
@@ -5231,7 +5197,7 @@ static bool areTwoInsertFromSameBuildVector(
52315197std::optional<BoUpSLP::OrdersType>
52325198BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
52335199 // FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
5234- if (TE.isNonPowOf2Vec(*TTI ))
5200+ if (TE.isNonPowOf2Vec())
52355201 return std::nullopt;
52365202
52375203 // No need to reorder if need to shuffle reuses, still need to shuffle the
@@ -5265,8 +5231,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
52655231 }
52665232 }
52675233 if (Sz == 2 && TE.getVectorFactor() == 4 &&
5268- TTI->getRegUsageForType (getWidenedType(TE.Scalars.front()->getType(),
5269- 2 * TE.getVectorFactor())) == 1)
5234+ TTI->getNumberOfParts (getWidenedType(TE.Scalars.front()->getType(),
5235+ 2 * TE.getVectorFactor())) == 1)
52705236 return std::nullopt;
52715237 if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
52725238 Sz)) {
@@ -5615,7 +5581,7 @@ void BoUpSLP::reorderTopToBottom() {
56155581
56165582 // Reorder the graph nodes according to their vectorization factor.
56175583 for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
5618- VF - = 2) {
5584+ VF / = 2) {
56195585 auto It = VFToOrderedEntries.find(VF);
56205586 if (It == VFToOrderedEntries.end())
56215587 continue;
@@ -5788,7 +5754,7 @@ bool BoUpSLP::canReorderOperands(
57885754 ArrayRef<TreeEntry *> ReorderableGathers,
57895755 SmallVectorImpl<TreeEntry *> &GatherOps) {
57905756 // FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
5791- if (UserTE->isNonPowOf2Vec(*TTI ))
5757+ if (UserTE->isNonPowOf2Vec())
57925758 return false;
57935759
57945760 for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
@@ -5963,7 +5929,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
59635929 auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0));
59645930 const auto AllowsReordering = [&](const TreeEntry *TE) {
59655931 // FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
5966- if (TE->isNonPowOf2Vec(*TTI ))
5932+ if (TE->isNonPowOf2Vec())
59675933 return false;
59685934 if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
59695935 (TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) ||
@@ -6609,7 +6575,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
66096575 case Instruction::ExtractElement: {
66106576 bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
66116577 // FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
6612- if (!hasFullVectorsOnly(*TTI, VL0->getType(), VL.size()))
6578+ if (!has_single_bit( VL.size()))
66136579 return TreeEntry::NeedToGather;
66146580 if (Reuse || !CurrentOrder.empty())
66156581 return TreeEntry::Vectorize;
@@ -7019,7 +6985,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
70196985 ReuseShuffleIndices.clear();
70206986 } else {
70216987 // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
7022- if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec(*TTI )) {
6988+ if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
70236989 LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
70246990 "for nodes with padding.\n");
70256991 newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
@@ -7032,18 +6998,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
70326998 return isa<UndefValue>(V) ||
70336999 !isConstant(V);
70347000 })) ||
7035- !hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(),
7036- NumUniqueScalarValues)) {
7001+ !llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
70377002 if (DoNotFail && UniquePositions.size() > 1 &&
70387003 NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
70397004 all_of(UniqueValues, [=](Value *V) {
70407005 return isa<ExtractElementInst>(V) ||
70417006 areAllUsersVectorized(cast<Instruction>(V),
70427007 UserIgnoreList);
70437008 })) {
7044- // Find the number of elements, which forms full vectors.
7045- unsigned PWSz = getFullVectorNumberOfElements(
7046- *TTI, UniqueValues.front()->getType(), UniqueValues.size());
7009+ unsigned PWSz = PowerOf2Ceil(UniqueValues.size());
70477010 if (PWSz == VL.size()) {
70487011 ReuseShuffleIndices.clear();
70497012 } else {
@@ -9254,7 +9217,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
92549217 }
92559218 assert(!CommonMask.empty() && "Expected non-empty common mask.");
92569219 auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
9257- unsigned NumParts = TTI.getRegUsageForType (MaskVecTy);
9220+ unsigned NumParts = TTI.getNumberOfParts (MaskVecTy);
92589221 if (NumParts == 0 || NumParts >= Mask.size())
92599222 NumParts = 1;
92609223 unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
@@ -9271,7 +9234,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
92719234 }
92729235 assert(!CommonMask.empty() && "Expected non-empty common mask.");
92739236 auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
9274- unsigned NumParts = TTI.getRegUsageForType (MaskVecTy);
9237+ unsigned NumParts = TTI.getNumberOfParts (MaskVecTy);
92759238 if (NumParts == 0 || NumParts >= Mask.size())
92769239 NumParts = 1;
92779240 unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
@@ -9777,7 +9740,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
97779740 unsigned const NumElts = SrcVecTy->getNumElements();
97789741 unsigned const NumScalars = VL.size();
97799742
9780- unsigned NumOfParts = TTI->getRegUsageForType (SrcVecTy);
9743+ unsigned NumOfParts = TTI->getNumberOfParts (SrcVecTy);
97819744
97829745 SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
97839746 unsigned OffsetBeg = *getElementIndex(VL.front());
@@ -10993,9 +10956,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1099310956 // Keep original scalar if number of externally used instructions in
1099410957 // the same entry is not power of 2. It may help to do some extra
1099510958 // vectorization for now.
10996- KeepScalar =
10997- ScalarUsesCount <= 1 ||
10998- !hasFullVectorsOnly(*TTI, EU.Scalar->getType(), ScalarUsesCount);
10959+ KeepScalar = ScalarUsesCount <= 1 || !has_single_bit(ScalarUsesCount);
1099910960 }
1100010961 if (KeepScalar) {
1100110962 ExternalUsesAsOriginalScalar.insert(EU.Scalar);
@@ -11688,14 +11649,13 @@ BoUpSLP::isGatherShuffledEntry(
1168811649 if (TE == VectorizableTree.front().get())
1168911650 return {};
1169011651 // FIXME: Gathering for non-power-of-2 nodes not implemented yet.
11691- if (TE->isNonPowOf2Vec(*TTI ))
11652+ if (TE->isNonPowOf2Vec())
1169211653 return {};
1169311654 Mask.assign(VL.size(), PoisonMaskElem);
1169411655 assert(TE->UserTreeIndices.size() == 1 &&
1169511656 "Expected only single user of the gather node.");
11696- // Number of scalars must be divisible by NumParts.
11697- if (VL.size() % NumParts != 0)
11698- return {};
11657+ assert(VL.size() % NumParts == 0 &&
11658+ "Number of scalars must be divisible by NumParts.");
1169911659 unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
1170011660 SmallVector<std::optional<TTI::ShuffleKind>> Res;
1170111661 for (unsigned Part : seq<unsigned>(NumParts)) {
@@ -12834,7 +12794,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1283412794 SmallVector<SmallVector<const TreeEntry *>> Entries;
1283512795 Type *OrigScalarTy = GatheredScalars.front()->getType();
1283612796 auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size());
12837- unsigned NumParts = TTI->getRegUsageForType (VecTy);
12797+ unsigned NumParts = TTI->getNumberOfParts (VecTy);
1283812798 if (NumParts == 0 || NumParts >= GatheredScalars.size())
1283912799 NumParts = 1;
1284012800 if (!all_of(GatheredScalars, IsaPred<UndefValue>)) {
@@ -16080,7 +16040,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1608016040 [&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
1608116041 return 0u;
1608216042
16083- unsigned NumParts = TTI->getRegUsageForType (
16043+ unsigned NumParts = TTI->getNumberOfParts (
1608416044 getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
1608516045
1608616046 // The maximum bit width required to represent all the values that can be
@@ -16137,7 +16097,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1613716097 // use - ignore it.
1613816098 if (NumParts > 1 &&
1613916099 NumParts ==
16140- TTI->getRegUsageForType (getWidenedType(
16100+ TTI->getNumberOfParts (getWidenedType(
1614116101 IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF)))
1614216102 return 0u;
1614316103
@@ -16998,7 +16958,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
1699816958 for (unsigned I = NextInst; I < MaxInst; ++I) {
1699916959 unsigned ActualVF = std::min(MaxInst - I, VF);
1700016960
17001- if (!hasFullVectorsOnly(*TTI, ScalarTy, ActualVF))
16961+ if (!has_single_bit( ActualVF))
1700216962 continue;
1700316963
1700416964 if (MaxVFOnly && ActualVF < MaxVF)
0 commit comments