@@ -20322,6 +20322,38 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
2032220322 return Dev * 96 / (Mean * Mean) == 0;
2032320323}
2032420324
20325+ namespace {
20326+
20327+ /// A group of stores that we'll try to bundle together using vector ops.
20328+ /// They are ordered using the signed distance of their address operand to the
20329+ /// address of this group's BaseInstr.
20330+ struct RelatedStoreInsts {
20331+ RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
20332+ void reset(unsigned NewBaseInstr) {
20333+ BaseInstrIdx = NewBaseInstr;
20334+ Instrs.clear();
20335+ insertOrLookup(NewBaseInstr, 0);
20336+ }
20337+
20338+ /// Tries to insert \p InstrIdx as the store with a pointer distance of
20339+ /// \p PtrDist.
20340+ /// Does nothing if there is already a store with that \p PtrDist.
20341+ /// \returns The previously associated Instruction index, or std::nullopt
20342+ std::optional<unsigned> insertOrLookup(unsigned InstrIdx, int PtrDist) {
20343+ auto [It, Inserted] = Instrs.emplace(PtrDist, InstrIdx);
20344+ return Inserted ? std::nullopt : std::optional<unsigned>(It->second);
20345+ }
20346+
20347+ /// The index of the Base instruction, i.e. the one with a 0 pointer distance.
20348+ unsigned BaseInstrIdx;
20349+
20350+ /// Maps a pointer distance from \p BaseInstrIdx to an instruction index.
20351+ using DistToInstMap = std::map<int, unsigned>;
20352+ DistToInstMap Instrs;
20353+ };
20354+
20355+ } // end anonymous namespace
20356+
2032520357bool SLPVectorizerPass::vectorizeStores(
2032620358 ArrayRef<StoreInst *> Stores, BoUpSLP &R,
2032720359 DenseSet<std::tuple<Value *, Value *, Value *, Value *, unsigned>>
@@ -20331,31 +20363,22 @@ bool SLPVectorizerPass::vectorizeStores(
2033120363 BoUpSLP::ValueSet VectorizedStores;
2033220364 bool Changed = false;
2033320365
20334- struct StoreDistCompare {
20335- bool operator()(const std::pair<unsigned, int> &Op1,
20336- const std::pair<unsigned, int> &Op2) const {
20337- return Op1.second < Op2.second;
20338- }
20339- };
20340- // A set of pairs (index of store in Stores array ref, Distance of the store
20341- // address relative to base store address in units).
20342- using StoreIndexToDistSet =
20343- std::set<std::pair<unsigned, int>, StoreDistCompare>;
20344- auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
20366+ auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
2034520367 int PrevDist = -1;
2034620368 BoUpSLP::ValueList Operands;
2034720369 // Collect the chain into a list.
20348- for (auto [Idx, Data] : enumerate(Set)) {
20349- if (Operands.empty() || Data.second - PrevDist == 1) {
20350- Operands.push_back(Stores[Data.first]);
20351- PrevDist = Data.second;
20352- if (Idx != Set.size() - 1)
20370+ for (auto [Idx, Data] : enumerate(StoreSeq)) {
20371+ auto &[Dist, InstIdx] = Data;
20372+ if (Operands.empty() || Dist - PrevDist == 1) {
20373+ Operands.push_back(Stores[InstIdx]);
20374+ PrevDist = Dist;
20375+ if (Idx != StoreSeq.size() - 1)
2035320376 continue;
2035420377 }
20355- auto E = make_scope_exit([&, &DataVar = Data ]() {
20378+ auto E = make_scope_exit([&, &Dist = Dist, &InstIdx = InstIdx ]() {
2035620379 Operands.clear();
20357- Operands.push_back(Stores[DataVar.first ]);
20358- PrevDist = DataVar.second ;
20380+ Operands.push_back(Stores[InstIdx ]);
20381+ PrevDist = Dist ;
2035920382 });
2036020383
2036120384 if (Operands.size() <= 1 ||
@@ -20622,7 +20645,8 @@ bool SLPVectorizerPass::vectorizeStores(
2062220645 // Need to store the index of the very first store separately, since the set
2062320646 // may be reordered after the insertion and the first store may be moved. This
2062420647 // container allows to reduce number of calls of getPointersDiff() function.
20625- SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
20648+ SmallVector<RelatedStoreInsts> SortedStores;
20649+
2062620650 // Inserts the specified store SI with the given index Idx to the set of the
2062720651 // stores. If the store with the same distance is found already - stop
2062820652 // insertion, try to vectorize already found stores. If some stores from this
@@ -20656,56 +20680,52 @@ bool SLPVectorizerPass::vectorizeStores(
2065620680 // dependencies and no need to waste compile time to try to vectorize them.
2065720681 // - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
2065820682 auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
20659- for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
20683+ for (RelatedStoreInsts &StoreSeq : SortedStores) {
2066020684 std::optional<int> Diff = getPointersDiff(
20661- Stores[Set.first ]->getValueOperand()->getType(),
20662- Stores[Set.first ]->getPointerOperand(),
20685+ Stores[StoreSeq.BaseInstrIdx ]->getValueOperand()->getType(),
20686+ Stores[StoreSeq.BaseInstrIdx ]->getPointerOperand(),
2066320687 SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
2066420688 /*StrictCheck=*/true);
2066520689 if (!Diff)
2066620690 continue;
20667- auto It = Set.second.find(std::make_pair(Idx, *Diff));
20668- if (It == Set.second.end()) {
20669- Set.second.emplace(Idx, *Diff);
20691+ std::optional<unsigned> PrevInst =
20692+ StoreSeq.insertOrLookup(/*InstrIdx=*/Idx, /*PtrDist=*/*Diff);
20693+ if (!PrevInst) {
20694+ // No store was associated to that distance. Keep collecting.
2067020695 return;
2067120696 }
2067220697 // Try to vectorize the first found set to avoid duplicate analysis.
20673- TryToVectorize(Set.second);
20674- unsigned ItIdx = It->first;
20675- int ItDist = It->second;
20676- StoreIndexToDistSet PrevSet;
20677- copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()),
20678- [&](const std::pair<unsigned, int> &Pair) {
20679- return Pair.first > ItIdx;
20698+ TryToVectorize(StoreSeq.Instrs);
20699+ RelatedStoreInsts::DistToInstMap PrevSet;
20700+ copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
20701+ [&](const std::pair<int, unsigned> &DistAndIdx) {
20702+ return DistAndIdx.second > *PrevInst;
2068020703 });
20681- Set.second.clear();
20682- Set.first = Idx;
20683- Set.second.emplace(Idx, 0);
20704+ StoreSeq.reset(Idx);
2068420705 // Insert stores that followed previous match to try to vectorize them
2068520706 // with this store.
20686- unsigned StartIdx = ItIdx + 1;
20707+ unsigned StartIdx = *PrevInst + 1;
2068720708 SmallBitVector UsedStores(Idx - StartIdx);
2068820709 // Distances to previously found dup store (or this store, since they
2068920710 // store to the same addresses).
2069020711 SmallVector<int> Dists(Idx - StartIdx, 0);
20691- for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
20712+ for (auto [PtrDist, InstIdx] : reverse(PrevSet)) {
2069220713 // Do not try to vectorize sequences, we already tried.
20693- if (VectorizedStores.contains(Stores[Pair.first ]))
20714+ if (VectorizedStores.contains(Stores[InstIdx ]))
2069420715 break;
20695- unsigned BI = Pair.first - StartIdx;
20716+ unsigned BI = InstIdx - StartIdx;
2069620717 UsedStores.set(BI);
20697- Dists[BI] = Pair.second - ItDist ;
20718+ Dists[BI] = PtrDist - *Diff ;
2069820719 }
2069920720 for (unsigned I = StartIdx; I < Idx; ++I) {
2070020721 unsigned BI = I - StartIdx;
2070120722 if (UsedStores.test(BI))
20702- Set.second.emplace (I, Dists[BI]);
20723+ StoreSeq.insertOrLookup (I, Dists[BI]);
2070320724 }
2070420725 return;
2070520726 }
20706- auto &Res = SortedStores.emplace_back();
20707- Res.first = Idx;
20708- Res.second.emplace(Idx, 0);
20727+ // We did not find a comparable store, start a new sequence.
20728+ SortedStores.emplace_back(Idx);
2070920729 };
2071020730 Type *PrevValTy = nullptr;
2071120731 for (auto [I, SI] : enumerate(Stores)) {
@@ -20715,17 +20735,17 @@ bool SLPVectorizerPass::vectorizeStores(
2071520735 PrevValTy = SI->getValueOperand()->getType();
2071620736 // Check that we do not try to vectorize stores of different types.
2071720737 if (PrevValTy != SI->getValueOperand()->getType()) {
20718- for (auto &Set : SortedStores)
20719- TryToVectorize(Set.second );
20738+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20739+ TryToVectorize(StoreSeq.Instrs );
2072020740 SortedStores.clear();
2072120741 PrevValTy = SI->getValueOperand()->getType();
2072220742 }
2072320743 FillStoresSet(I, SI);
2072420744 }
2072520745
2072620746 // Final vectorization attempt.
20727- for (auto &Set : SortedStores)
20728- TryToVectorize(Set.second );
20747+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20748+ TryToVectorize(StoreSeq.Instrs );
2072920749
2073020750 return Changed;
2073120751}
0 commit comments