@@ -1314,6 +1314,22 @@ static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty,
13141314 Sz % NumParts == 0;
13151315}
13161316
1317+ /// Returns number of parts, the type \p VecTy will be split at the codegen
1318+ /// phase. If the type is going to be scalarized or does not uses whole
1319+ /// registers, returns 1.
1320+ static unsigned
1321+ getNumberOfParts(const TargetTransformInfo &TTI, VectorType *VecTy,
1322+ const unsigned Limit = std::numeric_limits<unsigned>::max()) {
1323+ unsigned NumParts = TTI.getNumberOfParts(VecTy);
1324+ if (NumParts == 0 || NumParts >= Limit)
1325+ return 1;
1326+ unsigned Sz = getNumElements(VecTy);
1327+ if (NumParts >= Sz || Sz % NumParts != 0 ||
1328+ !hasFullVectorsOrPowerOf2(TTI, VecTy->getElementType(), Sz / NumParts))
1329+ return 1;
1330+ return NumParts;
1331+ }
1332+
13171333namespace slpvectorizer {
13181334
13191335/// Bottom Up SLP Vectorizer.
@@ -4618,12 +4634,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
46184634 if (!isValidElementType(ScalarTy))
46194635 return std::nullopt;
46204636 auto *VecTy = getWidenedType(ScalarTy, NumScalars);
4621- int NumParts = TTI->getNumberOfParts(VecTy);
4622- if (NumParts == 0 || NumParts >= NumScalars ||
4623- VecTy->getNumElements() % NumParts != 0 ||
4624- !hasFullVectorsOrPowerOf2(*TTI, VecTy->getElementType(),
4625- VecTy->getNumElements() / NumParts))
4626- NumParts = 1;
4637+ unsigned NumParts = ::getNumberOfParts(*TTI, VecTy, NumScalars);
46274638 SmallVector<int> ExtractMask;
46284639 SmallVector<int> Mask;
46294640 SmallVector<SmallVector<const TreeEntry *>> Entries;
@@ -5574,8 +5585,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
55745585 }
55755586 }
55765587 if (Sz == 2 && TE.getVectorFactor() == 4 &&
5577- TTI-> getNumberOfParts(getWidenedType(TE.Scalars.front()->getType(),
5578- 2 * TE.getVectorFactor())) == 1)
5588+ :: getNumberOfParts(*TTI, getWidenedType(TE.Scalars.front()->getType(),
5589+ 2 * TE.getVectorFactor())) == 1)
55795590 return std::nullopt;
55805591 if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
55815592 Sz)) {
@@ -9847,12 +9858,15 @@ void BoUpSLP::transformNodes() {
98479858 // only with the single non-undef element).
98489859 bool IsSplat = isSplat(Slice);
98499860 if (Slices.empty() || !IsSplat ||
9850- (VF <= 2 && 2 * std::clamp(TTI->getNumberOfParts(getWidenedType(
9851- Slice.front()->getType(), VF)),
9852- 1U, VF - 1) !=
9853- std::clamp(TTI->getNumberOfParts(getWidenedType(
9854- Slice.front()->getType(), 2 * VF)),
9855- 1U, 2 * VF)) ||
9861+ (VF <= 2 &&
9862+ 2 * std::clamp(
9863+ ::getNumberOfParts(
9864+ *TTI, getWidenedType(Slice.front()->getType(), VF)),
9865+ 1U, VF - 1) !=
9866+ std::clamp(::getNumberOfParts(
9867+ *TTI, getWidenedType(Slice.front()->getType(),
9868+ 2 * VF)),
9869+ 1U, 2 * VF)) ||
98569870 count(Slice, Slice.front()) ==
98579871 static_cast<long>(isa<UndefValue>(Slice.front()) ? VF - 1
98589872 : 1)) {
@@ -10793,12 +10807,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1079310807 }
1079410808 assert(!CommonMask.empty() && "Expected non-empty common mask.");
1079510809 auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
10796- unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
10797- if (NumParts == 0 || NumParts >= Mask.size() ||
10798- MaskVecTy->getNumElements() % NumParts != 0 ||
10799- !hasFullVectorsOrPowerOf2(TTI, MaskVecTy->getElementType(),
10800- MaskVecTy->getNumElements() / NumParts))
10801- NumParts = 1;
10810+ unsigned NumParts = ::getNumberOfParts(TTI, MaskVecTy, Mask.size());
1080210811 unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
1080310812 const auto *It =
1080410813 find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
@@ -10813,12 +10822,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1081310822 }
1081410823 assert(!CommonMask.empty() && "Expected non-empty common mask.");
1081510824 auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
10816- unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
10817- if (NumParts == 0 || NumParts >= Mask.size() ||
10818- MaskVecTy->getNumElements() % NumParts != 0 ||
10819- !hasFullVectorsOrPowerOf2(TTI, MaskVecTy->getElementType(),
10820- MaskVecTy->getNumElements() / NumParts))
10821- NumParts = 1;
10825+ unsigned NumParts = ::getNumberOfParts(TTI, MaskVecTy, Mask.size());
1082210826 unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
1082310827 const auto *It =
1082410828 find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
@@ -11351,7 +11355,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1135111355 unsigned const NumElts = SrcVecTy->getNumElements();
1135211356 unsigned const NumScalars = VL.size();
1135311357
11354- unsigned NumOfParts = TTI-> getNumberOfParts(SrcVecTy);
11358+ unsigned NumOfParts = :: getNumberOfParts(*TTI, SrcVecTy);
1135511359
1135611360 SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
1135711361 unsigned OffsetBeg = *getElementIndex(VL.front());
@@ -14862,12 +14866,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1486214866 SmallVector<SmallVector<const TreeEntry *>> Entries;
1486314867 Type *OrigScalarTy = GatheredScalars.front()->getType();
1486414868 auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size());
14865- unsigned NumParts = TTI->getNumberOfParts(VecTy);
14866- if (NumParts == 0 || NumParts >= GatheredScalars.size() ||
14867- VecTy->getNumElements() % NumParts != 0 ||
14868- !hasFullVectorsOrPowerOf2(*TTI, VecTy->getElementType(),
14869- VecTy->getNumElements() / NumParts))
14870- NumParts = 1;
14869+ unsigned NumParts = ::getNumberOfParts(*TTI, VecTy, GatheredScalars.size());
1487114870 if (!all_of(GatheredScalars, IsaPred<UndefValue>)) {
1487214871 // Check for gathered extracts.
1487314872 bool Resized = false;
@@ -14899,12 +14898,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1489914898 Resized = true;
1490014899 GatheredScalars.append(VF - GatheredScalars.size(),
1490114900 PoisonValue::get(OrigScalarTy));
14902- NumParts = TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF));
14903- if (NumParts == 0 || NumParts >= GatheredScalars.size() ||
14904- VecTy->getNumElements() % NumParts != 0 ||
14905- !hasFullVectorsOrPowerOf2(*TTI, VecTy->getElementType(),
14906- VecTy->getNumElements() / NumParts))
14907- NumParts = 1;
14901+ NumParts =
14902+ ::getNumberOfParts(*TTI, getWidenedType(OrigScalarTy, VF), VF);
1490814903 }
1490914904 }
1491014905 }
@@ -17049,10 +17044,10 @@ void BoUpSLP::optimizeGatherSequence() {
1704917044 // Check if the last undefs actually change the final number of used vector
1705017045 // registers.
1705117046 return SM1.size() - LastUndefsCnt > 1 &&
17052- TTI-> getNumberOfParts(SI1->getType()) ==
17053- TTI-> getNumberOfParts(
17054- getWidenedType(SI1->getType()->getElementType(),
17055- SM1.size() - LastUndefsCnt));
17047+ :: getNumberOfParts(*TTI, SI1->getType()) ==
17048+ :: getNumberOfParts(
17049+ *TTI, getWidenedType(SI1->getType()->getElementType(),
17050+ SM1.size() - LastUndefsCnt));
1705617051 };
1705717052 // Perform O(N^2) search over the gather/shuffle sequences and merge identical
1705817053 // instructions. TODO: We can further optimize this scan if we split the
@@ -17829,9 +17824,12 @@ bool BoUpSLP::collectValuesToDemote(
1782917824 const unsigned VF = E.Scalars.size();
1783017825 Type *OrigScalarTy = E.Scalars.front()->getType();
1783117826 if (UniqueBases.size() <= 2 ||
17832- TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF)) ==
17833- TTI->getNumberOfParts(getWidenedType(
17834- IntegerType::get(OrigScalarTy->getContext(), BitWidth), VF)))
17827+ ::getNumberOfParts(*TTI, getWidenedType(OrigScalarTy, VF)) ==
17828+ ::getNumberOfParts(
17829+ *TTI,
17830+ getWidenedType(
17831+ IntegerType::get(OrigScalarTy->getContext(), BitWidth),
17832+ VF)))
1783517833 ToDemote.push_back(E.Idx);
1783617834 }
1783717835 return Res;
@@ -18241,8 +18239,8 @@ void BoUpSLP::computeMinimumValueSizes() {
1824118239 [&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
1824218240 return 0u;
1824318241
18244- unsigned NumParts = TTI-> getNumberOfParts(
18245- getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
18242+ unsigned NumParts = :: getNumberOfParts(
18243+ *TTI, getWidenedType(TreeRootIT, VF * ScalarTyNumElements));
1824618244
1824718245 // The maximum bit width required to represent all the values that can be
1824818246 // demoted without loss of precision. It would be safe to truncate the roots
@@ -18302,8 +18300,10 @@ void BoUpSLP::computeMinimumValueSizes() {
1830218300 // use - ignore it.
1830318301 if (NumParts > 1 &&
1830418302 NumParts ==
18305- TTI->getNumberOfParts(getWidenedType(
18306- IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF)))
18303+ ::getNumberOfParts(
18304+ *TTI, getWidenedType(IntegerType::get(F->getContext(),
18305+ bit_ceil(MaxBitWidth)),
18306+ VF)))
1830718307 return 0u;
1830818308
1830918309 unsigned Opcode = E.getOpcode();
@@ -20086,14 +20086,14 @@ class HorizontalReduction {
2008620086 ReduxWidth =
2008720087 getFloorFullVectorNumberOfElements(TTI, ScalarTy, ReduxWidth);
2008820088 VectorType *Tp = getWidenedType(ScalarTy, ReduxWidth);
20089- NumParts = TTI. getNumberOfParts(Tp);
20089+ NumParts = :: getNumberOfParts(TTI, Tp);
2009020090 NumRegs =
2009120091 TTI.getNumberOfRegisters(TTI.getRegisterClassForType(true, Tp));
2009220092 while (NumParts > NumRegs) {
2009320093 assert(ReduxWidth > 0 && "ReduxWidth is unexpectedly 0.");
2009420094 ReduxWidth = bit_floor(ReduxWidth - 1);
2009520095 VectorType *Tp = getWidenedType(ScalarTy, ReduxWidth);
20096- NumParts = TTI. getNumberOfParts(Tp);
20096+ NumParts = :: getNumberOfParts(TTI, Tp);
2009720097 NumRegs =
2009820098 TTI.getNumberOfRegisters(TTI.getRegisterClassForType(true, Tp));
2009920099 }
0 commit comments