@@ -21191,25 +21191,30 @@ bool SLPVectorizerPass::vectorizeStores(
2119121191 ++Repeat;
2119221192 bool RepeatChanged = false;
2119321193 bool AnyProfitableGraph = false;
21194- for (unsigned Size : CandidateVFs) {
21194+ for (unsigned VF : CandidateVFs) {
2119521195 AnyProfitableGraph = false;
21196- unsigned StartIdx = std::distance(
21197- RangeSizes.begin(),
21198- find_if(RangeSizes,
21199- std::bind(IsNotVectorized, Size >= MaxRegVF, _1)));
21200- while (StartIdx < End) {
21201- unsigned EndIdx = std::distance(
21196+ unsigned FirstUnvecStore =
21197+ std::distance(RangeSizes.begin(),
21198+ find_if(RangeSizes, std::bind(IsNotVectorized,
21199+ VF >= MaxRegVF, _1)));
21200+
21201+ // Form slices of size VF starting from FirstUnvecStore and try to
21202+ // vectorize them.
21203+ while (FirstUnvecStore < End) {
21204+ unsigned FirstVecStore = std::distance(
2120221205 RangeSizes.begin(),
21203- find_if(RangeSizes.drop_front(StartIdx),
21204- std::bind(IsVectorized, Size >= MaxRegVF, _1)));
21205- unsigned Sz = EndIdx >= End ? End : EndIdx;
21206- for (unsigned Cnt = StartIdx; Cnt + Size <= Sz;) {
21207- if (!checkTreeSizes(RangeSizes.slice(Cnt, Size),
21208- Size >= MaxRegVF)) {
21209- ++Cnt;
21206+ find_if(RangeSizes.drop_front(FirstUnvecStore),
21207+ std::bind(IsVectorized, VF >= MaxRegVF, _1)));
21208+ unsigned MaxSliceEnd = FirstVecStore >= End ? End : FirstVecStore;
21209+ for (unsigned SliceStartIdx = FirstUnvecStore;
21210+ SliceStartIdx + VF <= MaxSliceEnd;) {
21211+ if (!checkTreeSizes(RangeSizes.slice(SliceStartIdx, VF),
21212+ VF >= MaxRegVF)) {
21213+ ++SliceStartIdx;
2121021214 continue;
2121121215 }
21212- ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
21216+ ArrayRef<Value *> Slice =
21217+ ArrayRef(Operands).slice(SliceStartIdx, VF);
2121321218 assert(all_of(Slice,
2121421219 [&](Value *V) {
2121521220 return cast<StoreInst>(V)
@@ -21223,19 +21228,23 @@ bool SLPVectorizerPass::vectorizeStores(
2122321228 if (!NonSchedulable.empty()) {
2122421229 auto [NonSchedSizeMax, NonSchedSizeMin] =
2122521230 NonSchedulable.lookup(Slice.front());
21226- if (NonSchedSizeMax > 0 && NonSchedSizeMin <= Size) {
21227- Cnt += NonSchedSizeMax;
21231+ if (NonSchedSizeMax > 0 && NonSchedSizeMin <= VF) {
21232+ // VF is too ambitious. Try to vectorize another slice before
21233+ // trying a smaller VF.
21234+ SliceStartIdx += NonSchedSizeMax;
2122821235 continue;
2122921236 }
2123021237 }
2123121238 unsigned TreeSize;
2123221239 std::optional<bool> Res =
21233- vectorizeStoreChain(Slice, R, Cnt , MinVF, TreeSize);
21240+ vectorizeStoreChain(Slice, R, SliceStartIdx , MinVF, TreeSize);
2123421241 if (!Res) {
21242+ // Update the range of non schedulable VFs for slices starting
21243+ // at SliceStartIdx.
2123521244 NonSchedulable
21236- .try_emplace(Slice.front(), std::make_pair(Size, Size ))
21245+ .try_emplace(Slice.front(), std::make_pair(VF, VF ))
2123721246 .first->getSecond()
21238- .second = Size ;
21247+ .second = VF ;
2123921248 } else if (*Res) {
2124021249 // Mark the vectorized stores so that we don't vectorize them
2124121250 // again.
@@ -21246,63 +21255,67 @@ bool SLPVectorizerPass::vectorizeStores(
2124621255 // If we vectorized initial block, no need to try to vectorize
2124721256 // it again.
2124821257 for (std::pair<unsigned, unsigned> &P :
21249- RangeSizes.slice(Cnt, Size ))
21258+ RangeSizes.slice(SliceStartIdx, VF ))
2125021259 P.first = P.second = 0;
21251- if (Cnt < StartIdx + MinVF) {
21252- for (std::pair<unsigned, unsigned> &P :
21253- RangeSizes.slice(StartIdx, Cnt - StartIdx ))
21260+ if (SliceStartIdx < FirstUnvecStore + MinVF) {
21261+ for (std::pair<unsigned, unsigned> &P : RangeSizes.slice(
21262+ FirstUnvecStore, SliceStartIdx - FirstUnvecStore ))
2125421263 P.first = P.second = 0;
21255- StartIdx = Cnt + Size ;
21264+ FirstUnvecStore = SliceStartIdx + VF ;
2125621265 }
21257- if (Cnt > Sz - Size - MinVF) {
21266+ if (SliceStartIdx > MaxSliceEnd - VF - MinVF) {
2125821267 for (std::pair<unsigned, unsigned> &P :
21259- RangeSizes.slice(Cnt + Size, Sz - (Cnt + Size)))
21268+ RangeSizes.slice(SliceStartIdx + VF,
21269+ MaxSliceEnd - (SliceStartIdx + VF)))
2126021270 P.first = P.second = 0;
21261- if (Sz == End)
21262- End = Cnt ;
21263- Sz = Cnt ;
21271+ if (MaxSliceEnd == End)
21272+ End = SliceStartIdx ;
21273+ MaxSliceEnd = SliceStartIdx ;
2126421274 }
21265- Cnt += Size ;
21275+ SliceStartIdx += VF ;
2126621276 continue;
2126721277 }
21268- if (Size > 2 && Res &&
21269- !all_of(RangeSizes.slice(Cnt, Size ),
21270- std::bind(VFIsProfitable, Size >= MaxRegVF, TreeSize,
21278+ if (VF > 2 && Res &&
21279+ !all_of(RangeSizes.slice(SliceStartIdx, VF ),
21280+ std::bind(VFIsProfitable, VF >= MaxRegVF, TreeSize,
2127121281 _1))) {
21272- Cnt += Size ;
21282+ SliceStartIdx += VF ;
2127321283 continue;
2127421284 }
2127521285 // Check for the very big VFs that we're not rebuilding same
2127621286 // trees, just with larger number of elements.
21277- if (Size > MaxRegVF && TreeSize > 1 &&
21278- all_of(RangeSizes.slice(Cnt, Size ),
21287+ if (VF > MaxRegVF && TreeSize > 1 &&
21288+ all_of(RangeSizes.slice(SliceStartIdx, VF ),
2127921289 std::bind(FirstSizeSame, TreeSize, _1))) {
21280- Cnt += Size;
21281- while (Cnt != Sz && RangeSizes[Cnt].first == TreeSize)
21282- ++Cnt;
21290+ SliceStartIdx += VF;
21291+ while (SliceStartIdx != MaxSliceEnd &&
21292+ RangeSizes[SliceStartIdx].first == TreeSize)
21293+ ++SliceStartIdx;
2128321294 continue;
2128421295 }
21285- if (TreeSize > 1)
21296+ if (TreeSize > 1) {
2128621297 for (std::pair<unsigned, unsigned> &P :
21287- RangeSizes.slice(Cnt, Size )) {
21288- if (Size >= MaxRegVF)
21298+ RangeSizes.slice(SliceStartIdx, VF )) {
21299+ if (VF >= MaxRegVF)
2128921300 P.second = std::max(P.second, TreeSize);
2129021301 else
2129121302 P.first = std::max(P.first, TreeSize);
2129221303 }
21293- ++Cnt;
21304+ }
21305+ ++SliceStartIdx;
2129421306 AnyProfitableGraph = true;
2129521307 }
21296- if (StartIdx >= End)
21308+ if (FirstUnvecStore >= End)
2129721309 break;
21298- if (Sz - StartIdx < Size && Sz - StartIdx >= MinVF)
21310+ if (MaxSliceEnd - FirstUnvecStore < VF &&
21311+ MaxSliceEnd - FirstUnvecStore >= MinVF)
2129921312 AnyProfitableGraph = true;
21300- StartIdx = std::distance(
21313+ FirstUnvecStore = std::distance(
2130121314 RangeSizes.begin(),
21302- find_if(RangeSizes.drop_front(Sz ),
21303- std::bind(IsNotVectorized, Size >= MaxRegVF, _1)));
21315+ find_if(RangeSizes.drop_front(MaxSliceEnd ),
21316+ std::bind(IsNotVectorized, VF >= MaxRegVF, _1)));
2130421317 }
21305- if (!AnyProfitableGraph && Size >= MaxRegVF && has_single_bit(Size ))
21318+ if (!AnyProfitableGraph && VF >= MaxRegVF && has_single_bit(VF ))
2130621319 break;
2130721320 }
2130821321 // All values vectorized - exit.
0 commit comments