@@ -19994,6 +19994,38 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
1999419994 return Dev * 96 / (Mean * Mean) == 0;
1999519995}
1999619996
19997+ namespace {
19998+
19999+ /// A group of stores that we'll try to bundle together using vector ops.
20000+ /// They are ordered using the signed distance of their address operand to the
20001+ /// address of this group's BaseInstr.
20002+ struct RelatedStoreInsts {
20003+ RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
20004+ void reset(unsigned NewBaseInstr) {
20005+ BaseInstrIdx = NewBaseInstr;
20006+ Instrs.clear();
20007+ insertOrLookup(NewBaseInstr, 0);
20008+ }
20009+
20010+ /// Tries to insert \p InstrIdx as the store with a pointer distance of
20011+ /// \p PtrDist.
20012+ /// Does nothing if there is already a store with that \p PtrDist.
20013+ /// \returns The previously associated Instruction index, or std::nullopt
20014+ std::optional<unsigned> insertOrLookup(unsigned InstrIdx, int PtrDist) {
20015+ auto [It, Inserted] = Instrs.emplace(PtrDist, InstrIdx);
20016+ return Inserted ? std::nullopt : std::optional<unsigned>(It->second);
20017+ }
20018+
20019+ /// The index of the Base instruction, i.e. the one with a 0 pointer distance.
20020+ unsigned BaseInstrIdx;
20021+
20022+ /// Maps a pointer distance from \p BaseInstrIdx to an instruction index.
20023+ using DistToInstMap = std::map<int, unsigned>;
20024+ DistToInstMap Instrs;
20025+ };
20026+
20027+ } // end anonymous namespace
20028+
1999720029bool SLPVectorizerPass::vectorizeStores(
1999820030 ArrayRef<StoreInst *> Stores, BoUpSLP &R,
1999920031 DenseSet<std::tuple<Value *, Value *, Value *, Value *, unsigned>>
@@ -20003,31 +20035,22 @@ bool SLPVectorizerPass::vectorizeStores(
2000320035 BoUpSLP::ValueSet VectorizedStores;
2000420036 bool Changed = false;
2000520037
20006- struct StoreDistCompare {
20007- bool operator()(const std::pair<unsigned, int> &Op1,
20008- const std::pair<unsigned, int> &Op2) const {
20009- return Op1.second < Op2.second;
20010- }
20011- };
20012- // A set of pairs (index of store in Stores array ref, Distance of the store
20013- // address relative to base store address in units).
20014- using StoreIndexToDistSet =
20015- std::set<std::pair<unsigned, int>, StoreDistCompare>;
20016- auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
20038+ auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
2001720039 int PrevDist = -1;
2001820040 BoUpSLP::ValueList Operands;
2001920041 // Collect the chain into a list.
20020- for (auto [Idx, Data] : enumerate(Set)) {
20021- if (Operands.empty() || Data.second - PrevDist == 1) {
20022- Operands.push_back(Stores[Data.first]);
20023- PrevDist = Data.second;
20024- if (Idx != Set.size() - 1)
20042+ for (auto [Idx, Data] : enumerate(StoreSeq)) {
20043+ auto &[Dist, InstIdx] = Data;
20044+ if (Operands.empty() || Dist - PrevDist == 1) {
20045+ Operands.push_back(Stores[InstIdx]);
20046+ PrevDist = Dist;
20047+ if (Idx != StoreSeq.size() - 1)
2002520048 continue;
2002620049 }
20027- auto E = make_scope_exit([&, &DataVar = Data ]() {
20050+ auto E = make_scope_exit([&, &Dist = Dist, &InstIdx = InstIdx ]() {
2002820051 Operands.clear();
20029- Operands.push_back(Stores[DataVar.first ]);
20030- PrevDist = DataVar.second ;
20052+ Operands.push_back(Stores[InstIdx ]);
20053+ PrevDist = Dist ;
2003120054 });
2003220055
2003320056 if (Operands.size() <= 1 ||
@@ -20294,7 +20317,8 @@ bool SLPVectorizerPass::vectorizeStores(
2029420317 // Need to store the index of the very first store separately, since the set
2029520318 // may be reordered after the insertion and the first store may be moved. This
2029620319 // container allows to reduce number of calls of getPointersDiff() function.
20297- SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
20320+ SmallVector<RelatedStoreInsts> SortedStores;
20321+
2029820322 // Inserts the specified store SI with the given index Idx to the set of the
2029920323 // stores. If the store with the same distance is found already - stop
2030020324 // insertion, try to vectorize already found stores. If some stores from this
@@ -20328,56 +20352,52 @@ bool SLPVectorizerPass::vectorizeStores(
2032820352 // dependencies and no need to waste compile time to try to vectorize them.
2032920353 // - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
2033020354 auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
20331- for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
20355+ for (RelatedStoreInsts &StoreSeq : SortedStores) {
2033220356 std::optional<int> Diff = getPointersDiff(
20333- Stores[Set.first ]->getValueOperand()->getType(),
20334- Stores[Set.first ]->getPointerOperand(),
20357+ Stores[StoreSeq.BaseInstrIdx ]->getValueOperand()->getType(),
20358+ Stores[StoreSeq.BaseInstrIdx ]->getPointerOperand(),
2033520359 SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
2033620360 /*StrictCheck=*/true);
2033720361 if (!Diff)
2033820362 continue;
20339- auto It = Set.second.find(std::make_pair(Idx, *Diff));
20340- if (It == Set.second.end()) {
20341- Set.second.emplace(Idx, *Diff);
20363+ std::optional<unsigned> PrevInst =
20364+ StoreSeq.insertOrLookup(/*InstrIdx=*/Idx, /*PtrDist=*/*Diff);
20365+ if (!PrevInst) {
20366+ // No store was associated to that distance. Keep collecting.
2034220367 return;
2034320368 }
2034420369 // Try to vectorize the first found set to avoid duplicate analysis.
20345- TryToVectorize(Set.second);
20346- unsigned ItIdx = It->first;
20347- int ItDist = It->second;
20348- StoreIndexToDistSet PrevSet;
20349- copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()),
20350- [&](const std::pair<unsigned, int> &Pair) {
20351- return Pair.first > ItIdx;
20370+ TryToVectorize(StoreSeq.Instrs);
20371+ RelatedStoreInsts::DistToInstMap PrevSet;
20372+ copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
20373+ [&](const std::pair<int, unsigned> &DistAndIdx) {
20374+ return DistAndIdx.second > *PrevInst;
2035220375 });
20353- Set.second.clear();
20354- Set.first = Idx;
20355- Set.second.emplace(Idx, 0);
20376+ StoreSeq.reset(Idx);
2035620377 // Insert stores that followed previous match to try to vectorize them
2035720378 // with this store.
20358- unsigned StartIdx = ItIdx + 1;
20379+ unsigned StartIdx = *PrevInst + 1;
2035920380 SmallBitVector UsedStores(Idx - StartIdx);
2036020381 // Distances to previously found dup store (or this store, since they
2036120382 // store to the same addresses).
2036220383 SmallVector<int> Dists(Idx - StartIdx, 0);
20363- for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
20384+ for (auto [PtrDist, InstIdx] : reverse(PrevSet)) {
2036420385 // Do not try to vectorize sequences, we already tried.
20365- if (VectorizedStores.contains(Stores[Pair.first ]))
20386+ if (VectorizedStores.contains(Stores[InstIdx ]))
2036620387 break;
20367- unsigned BI = Pair.first - StartIdx;
20388+ unsigned BI = InstIdx - StartIdx;
2036820389 UsedStores.set(BI);
20369- Dists[BI] = Pair.second - ItDist ;
20390+ Dists[BI] = PtrDist - *Diff ;
2037020391 }
2037120392 for (unsigned I = StartIdx; I < Idx; ++I) {
2037220393 unsigned BI = I - StartIdx;
2037320394 if (UsedStores.test(BI))
20374- Set.second.emplace (I, Dists[BI]);
20395+ StoreSeq.insertOrLookup (I, Dists[BI]);
2037520396 }
2037620397 return;
2037720398 }
20378- auto &Res = SortedStores.emplace_back();
20379- Res.first = Idx;
20380- Res.second.emplace(Idx, 0);
20399+ // We did not find a comparable store, start a new sequence.
20400+ SortedStores.emplace_back(Idx);
2038120401 };
2038220402 Type *PrevValTy = nullptr;
2038320403 for (auto [I, SI] : enumerate(Stores)) {
@@ -20387,17 +20407,17 @@ bool SLPVectorizerPass::vectorizeStores(
2038720407 PrevValTy = SI->getValueOperand()->getType();
2038820408 // Check that we do not try to vectorize stores of different types.
2038920409 if (PrevValTy != SI->getValueOperand()->getType()) {
20390- for (auto &Set : SortedStores)
20391- TryToVectorize(Set.second );
20410+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20411+ TryToVectorize(StoreSeq.Instrs );
2039220412 SortedStores.clear();
2039320413 PrevValTy = SI->getValueOperand()->getType();
2039420414 }
2039520415 FillStoresSet(I, SI);
2039620416 }
2039720417
2039820418 // Final vectorization attempt.
20399- for (auto &Set : SortedStores)
20400- TryToVectorize(Set.second );
20419+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20420+ TryToVectorize(StoreSeq.Instrs );
2040120421
2040220422 return Changed;
2040320423}
0 commit comments