@@ -20859,9 +20859,16 @@ namespace {
2085920859/// A group of stores that we'll try to bundle together using vector ops.
2086020860/// They are ordered using the signed distance of their address operand to the
2086120861/// address of this group's BaseInstr.
20862- struct RelatedStoreInsts {
20863- RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
20862+ class RelatedStoreInsts {
20863+ public:
20864+ RelatedStoreInsts(unsigned BaseInstrIdx, ArrayRef<StoreInst *> AllStores)
20865+ : AllStores(AllStores) {
20866+ reset(BaseInstrIdx);
20867+ }
20868+
2086420869 void reset(unsigned NewBaseInstr) {
20870+ assert(NewBaseInstr < AllStores.size() &&
20871+ "Instruction index out of bounds");
2086520872 BaseInstrIdx = NewBaseInstr;
2086620873 Instrs.clear();
2086720874 insertOrLookup(NewBaseInstr, 0);
@@ -20876,12 +20883,58 @@ struct RelatedStoreInsts {
2087620883 return Inserted ? std::nullopt : std::optional<unsigned>(It->second);
2087720884 }
2087820885
20886+ using DistToInstMap = std::map<int, unsigned>;
20887+ const DistToInstMap &getStores() const { return Instrs; }
20888+
20889+ /// If \p SI is related to this group of stores, return the distance of its
20890+ /// pointer operand to the one the group's BaseInstr.
20891+ std::optional<int> getPointerDiff(StoreInst &SI, const DataLayout &DL,
20892+ ScalarEvolution &SE) const {
20893+ StoreInst &BaseStore = *AllStores[BaseInstrIdx];
20894+ return getPointersDiff(
20895+ BaseStore.getValueOperand()->getType(), BaseStore.getPointerOperand(),
20896+ SI.getValueOperand()->getType(), SI.getPointerOperand(), DL, SE,
20897+ /*StrictCheck=*/true);
20898+ }
20899+
20900+ /// Recompute the pointer distances to be based on \p NewBaseInstIdx.
20901+ /// Stores whose index is less than \p MinSafeIdx will be dropped.
20902+ void rebase(unsigned MinSafeIdx, unsigned NewBaseInstIdx,
20903+ int DistFromCurBase) {
20904+ DistToInstMap PrevSet = std::move(Instrs);
20905+ reset(NewBaseInstIdx);
20906+
20907+ // Re-insert stores that come after MinSafeIdx to try and vectorize them
20908+ // again. Their distance will be "rebased" to use NewBaseInstIdx as
20909+ // reference.
20910+ for (auto [Dist, InstIdx] : PrevSet) {
20911+ if (InstIdx >= MinSafeIdx)
20912+ insertOrLookup(InstIdx, Dist - DistFromCurBase);
20913+ }
20914+ }
20915+
20916+ /// Remove all stores that have been vectorized from this group.
20917+ void clearVectorizedStores(const BoUpSLP::ValueSet &VectorizedStores) {
20918+ DistToInstMap::reverse_iterator LastVectorizedStore = find_if(
20919+ reverse(Instrs), [&](const std::pair<int, unsigned> &DistAndIdx) {
20920+ return VectorizedStores.contains(AllStores[DistAndIdx.second]);
20921+ });
20922+
20923+ // Get a forward iterator pointing after the last vectorized store and erase
20924+ // all stores before it so we don't try to vectorize them again.
20925+ DistToInstMap::iterator VectorizedStoresEnd = LastVectorizedStore.base();
20926+ Instrs.erase(Instrs.begin(), VectorizedStoresEnd);
20927+ }
20928+
20929+ private:
2087920930 /// The index of the Base instruction, i.e. the one with a 0 pointer distance.
2088020931 unsigned BaseInstrIdx;
2088120932
2088220933 /// Maps a pointer distance from \p BaseInstrIdx to an instruction index.
20883- using DistToInstMap = std::map<int, unsigned>;
2088420934 DistToInstMap Instrs;
20935+
20936+ /// Reference to all the stores in the BB being analyzed.
20937+ ArrayRef<StoreInst *> AllStores;
2088520938};
2088620939
2088720940} // end anonymous namespace
@@ -21165,14 +21218,7 @@ bool SLPVectorizerPass::vectorizeStores(
2116521218 }
2116621219 };
2116721220
21168- // Stores pair (first: index of the store into Stores array ref, address of
21169- // which taken as base, second: sorted set of pairs {index, dist}, which are
21170- // indices of stores in the set and their store location distances relative to
21171- // the base address).
21172-
21173- // Need to store the index of the very first store separately, since the set
21174- // may be reordered after the insertion and the first store may be moved. This
21175- // container allows to reduce number of calls of getPointersDiff() function.
21221+ /// Groups of stores to vectorize
2117621222 SmallVector<RelatedStoreInsts> SortedStores;
2117721223
2117821224 // Inserts the specified store SI with the given index Idx to the set of the
@@ -21208,52 +21254,30 @@ bool SLPVectorizerPass::vectorizeStores(
2120821254 // dependencies and no need to waste compile time to try to vectorize them.
2120921255 // - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
2121021256 auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
21211- for (RelatedStoreInsts &StoreSeq : SortedStores) {
21212- std::optional<int> Diff = getPointersDiff(
21213- Stores[StoreSeq.BaseInstrIdx]->getValueOperand()->getType(),
21214- Stores[StoreSeq.BaseInstrIdx]->getPointerOperand(),
21215- SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
21216- /*StrictCheck=*/true);
21217- if (!Diff)
21218- continue;
21219- std::optional<unsigned> PrevInst =
21220- StoreSeq.insertOrLookup(/*InstrIdx=*/Idx, /*PtrDist=*/*Diff);
21221- if (!PrevInst) {
21222- // No store was associated to that distance. Keep collecting.
21223- return;
21224- }
21225- // Try to vectorize the first found set to avoid duplicate analysis.
21226- TryToVectorize(StoreSeq.Instrs);
21227- RelatedStoreInsts::DistToInstMap PrevSet;
21228- copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
21229- [&](const std::pair<int, unsigned> &DistAndIdx) {
21230- return DistAndIdx.second > *PrevInst;
21231- });
21232- StoreSeq.reset(Idx);
21233- // Insert stores that followed previous match to try to vectorize them
21234- // with this store.
21235- unsigned StartIdx = *PrevInst + 1;
21236- SmallBitVector UsedStores(Idx - StartIdx);
21237- // Distances to previously found dup store (or this store, since they
21238- // store to the same addresses).
21239- SmallVector<int> Dists(Idx - StartIdx, 0);
21240- for (auto [PtrDist, InstIdx] : reverse(PrevSet)) {
21241- // Do not try to vectorize sequences, we already tried.
21242- if (VectorizedStores.contains(Stores[InstIdx]))
21243- break;
21244- unsigned BI = InstIdx - StartIdx;
21245- UsedStores.set(BI);
21246- Dists[BI] = PtrDist - *Diff;
21247- }
21248- for (unsigned I = StartIdx; I < Idx; ++I) {
21249- unsigned BI = I - StartIdx;
21250- if (UsedStores.test(BI))
21251- StoreSeq.insertOrLookup(I, Dists[BI]);
21252- }
21257+ std::optional<int> PtrDist;
21258+ auto *RelatedStores = find_if(
21259+ SortedStores, [&PtrDist, SI, this](const RelatedStoreInsts &StoreSeq) {
21260+ PtrDist = StoreSeq.getPointerDiff(*SI, *DL, *SE);
21261+ return PtrDist.has_value();
21262+ });
21263+
21264+ // We did not find a comparable store, start a new group.
21265+ if (RelatedStores == SortedStores.end()) {
21266+ SortedStores.emplace_back(Idx, Stores);
2125321267 return;
2125421268 }
21255- // We did not find a comparable store, start a new sequence.
21256- SortedStores.emplace_back(Idx);
21269+
21270+ // If there is already a store in the group with the same PtrDiff, try to
21271+ // vectorize the existing instructions before adding the current store.
21272+ // Otherwise, insert this store and keep collecting.
21273+ if (std::optional<unsigned> PrevInst =
21274+ RelatedStores->insertOrLookup(Idx, *PtrDist)) {
21275+ TryToVectorize(RelatedStores->getStores());
21276+ RelatedStores->clearVectorizedStores(VectorizedStores);
21277+ RelatedStores->rebase(/*MinSafeIdx=*/*PrevInst + 1,
21278+ /*NewBaseInstIdx=*/Idx,
21279+ /*DistFromCurBase=*/*PtrDist);
21280+ }
2125721281 };
2125821282 Type *PrevValTy = nullptr;
2125921283 for (auto [I, SI] : enumerate(Stores)) {
@@ -21264,7 +21288,7 @@ bool SLPVectorizerPass::vectorizeStores(
2126421288 // Check that we do not try to vectorize stores of different types.
2126521289 if (PrevValTy != SI->getValueOperand()->getType()) {
2126621290 for (RelatedStoreInsts &StoreSeq : SortedStores)
21267- TryToVectorize(StoreSeq.Instrs );
21291+ TryToVectorize(StoreSeq.getStores() );
2126821292 SortedStores.clear();
2126921293 PrevValTy = SI->getValueOperand()->getType();
2127021294 }
@@ -21273,7 +21297,7 @@ bool SLPVectorizerPass::vectorizeStores(
2127321297
2127421298 // Final vectorization attempt.
2127521299 for (RelatedStoreInsts &StoreSeq : SortedStores)
21276- TryToVectorize(StoreSeq.Instrs );
21300+ TryToVectorize(StoreSeq.getStores() );
2127721301
2127821302 return Changed;
2127921303}
0 commit comments