diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h index 619c2147f2e5c..626fcb5afcb90 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h @@ -54,6 +54,8 @@ class SeedBundle { NumUnusedBits += Utils::getNumBits(I); } + virtual void insert(Instruction *I, ScalarEvolution &SE) = 0; + unsigned getFirstUnusedElementIdx() const { for (unsigned ElmIdx : seq(0, Seeds.size())) if (!isUsed(ElmIdx)) @@ -96,6 +98,9 @@ class SeedBundle { MutableArrayRef getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2); + /// \Returns the number of seed elements in the bundle. + std::size_t size() const { return Seeds.size(); } + protected: SmallVector Seeds; /// The lanes that we have already vectorized. @@ -148,7 +153,7 @@ template class MemSeedBundle : public SeedBundle { "Expected LoadInst or StoreInst!"); assert(isa(MemI) && "Expected Load or Store!"); } - void insert(sandboxir::Instruction *I, ScalarEvolution &SE) { + void insert(sandboxir::Instruction *I, ScalarEvolution &SE) override { assert(isa(I) && "Expected a Store or a Load!"); auto Cmp = [&SE](Instruction *I0, Instruction *I1) { return Utils::atLowerAddress(cast(I0), @@ -162,5 +167,123 @@ template class MemSeedBundle : public SeedBundle { using StoreSeedBundle = MemSeedBundle; using LoadSeedBundle = MemSeedBundle; +/// Class to conveniently track Seeds within SeedBundles. Saves newly collected +/// seeds in the proper bundle. Supports constant-time removal, as seeds and +/// entire bundles are vectorized and marked used to signify removal. Iterators +/// skip bundles that are completely used. +class SeedContainer { + // Use the same key for different seeds if they are the same type and + // reference the same pointer, even if at different offsets. This directs + // potentially vectorizable seeds into the same bundle. + using KeyT = std::tuple; + // Trying to vectorize too many seeds at once is expensive in + // compilation-time. Use a vector of bundles (all with the same key) to + // partition the candidate set into more manageable units. Each bundle is + // size-limited by sbvec-seed-bundle-size-limit. TODO: There might be a + // better way to divide these than by simple insertion order. + using ValT = SmallVector>; + using BundleMapT = MapVector; + // Map from {pointer, Type, Opcode} to a vector of bundles. + BundleMapT Bundles; + // Allows finding a particular Instruction's bundle. + DenseMap SeedLookupMap; + + ScalarEvolution &SE; + + template KeyT getKey(LoadOrStoreT *LSI) const; + +public: + SeedContainer(ScalarEvolution &SE) : SE(SE) {} + + class iterator { + BundleMapT *Map = nullptr; + BundleMapT::iterator MapIt; + ValT *Vec = nullptr; + size_t VecIdx; + + public: + using difference_type = std::ptrdiff_t; + using value_type = SeedBundle; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::input_iterator_tag; + + /// Iterates over the \p Map of SeedBundle Vectors, starting at \p MapIt, + /// and \p Vec at \p VecIdx, skipping vectors that are completely + /// used. Iteration order over the keys {Pointer, Type, Opcode} follows + /// DenseMap iteration order. For a given key, the vectors of + /// SeedBundles will be returned in insertion order. As in the + /// pseudo code below: + /// + /// for Key,Value in Bundles + /// for SeedBundleVector in Value + /// for SeedBundle in SeedBundleVector + /// if !SeedBundle.allUsed() ... + /// + /// Note that the bundles themselves may have additional ordering, created + /// by the subclasses by insertAt. The bundles themselves may also have used + /// instructions. + iterator(BundleMapT &Map, BundleMapT::iterator MapIt, ValT *Vec, int VecIdx) + : Map(&Map), MapIt(MapIt), Vec(Vec), VecIdx(VecIdx) {} + value_type &operator*() { + assert(Vec != nullptr && "Already at end!"); + return *(*Vec)[VecIdx]; + } + // Skip completely used bundles by repeatedly calling operator++(). + void skipUsed() { + while (Vec && VecIdx < Vec->size() && this->operator*().allUsed()) + ++(*this); + } + // Iterators iterate over the bundles + iterator &operator++() { + assert(VecIdx >= 0 && "Already at end!"); + ++VecIdx; + if (VecIdx >= Vec->size()) { + assert(MapIt != Map->end() && "Already at end!"); + VecIdx = 0; + ++MapIt; + if (MapIt != Map->end()) + Vec = &MapIt->second; + else { + Vec = nullptr; + } + } + skipUsed(); + return *this; + } + iterator operator++(int) { + auto Copy = *this; + ++(*this); + return Copy; + } + bool operator==(const iterator &Other) const { + assert(Map == Other.Map && "Iterator of different objects!"); + return MapIt == Other.MapIt && VecIdx == Other.VecIdx; + } + bool operator!=(const iterator &Other) const { return !(*this == Other); } + }; + using const_iterator = BundleMapT::const_iterator; + template void insert(LoadOrStoreT *LSI); + // To support constant-time erase, these just mark the element used, rather + // than actually removing them from the bundle. + bool erase(Instruction *I); + bool erase(const KeyT &Key) { return Bundles.erase(Key); } + iterator begin() { + if (Bundles.empty()) + return end(); + auto BeginIt = + iterator(Bundles, Bundles.begin(), &Bundles.begin()->second, 0); + BeginIt.skipUsed(); + return BeginIt; + } + iterator end() { return iterator(Bundles, Bundles.end(), nullptr, 0); } + unsigned size() const { return Bundles.size(); } + +#ifndef NDEBUG + LLVM_DUMP_METHOD void dump() const; +#endif // NDEBUG +}; + } // namespace llvm::sandboxir + #endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp index 00a7dc3fcec93..20df9e344b61c 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp @@ -19,6 +19,10 @@ using namespace llvm; namespace llvm::sandboxir { +cl::opt SeedBundleSizeLimit( + "sbvec-seed-bundle-size-limit", cl::init(32), cl::Hidden, + cl::desc("Limit the size of the seed bundle to cap compilation time.")); + MutableArrayRef SeedBundle::getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowerOf2) { @@ -61,4 +65,68 @@ MutableArrayRef SeedBundle::getSlice(unsigned StartIdx, return {}; } +template +SeedContainer::KeyT SeedContainer::getKey(LoadOrStoreT *LSI) const { + assert((isa(LSI) || isa(LSI)) && + "Expected Load or Store!"); + Value *Ptr = Utils::getMemInstructionBase(LSI); + Instruction::Opcode Op = LSI->getOpcode(); + Type *Ty = Utils::getExpectedType(LSI); + if (auto *VTy = dyn_cast(Ty)) + Ty = VTy->getElementType(); + return {Ptr, Ty, Op}; +} + +// Explicit instantiations +template SeedContainer::KeyT +SeedContainer::getKey(LoadInst *LSI) const; +template SeedContainer::KeyT +SeedContainer::getKey(StoreInst *LSI) const; + +bool SeedContainer::erase(Instruction *I) { + assert((isa(I) || isa(I)) && "Expected Load or Store!"); + auto It = SeedLookupMap.find(I); + if (It == SeedLookupMap.end()) + return false; + SeedBundle *Bndl = It->second; + Bndl->setUsed(I); + return true; +} + +template void SeedContainer::insert(LoadOrStoreT *LSI) { + // Find the bundle containing seeds for this symbol and type-of-access. + auto &BundleVec = Bundles[getKey(LSI)]; + // Fill this vector of bundles front to back so that only the last bundle in + // the vector may have available space. This avoids iteration to find one with + // space. + if (BundleVec.empty() || BundleVec.back()->size() == SeedBundleSizeLimit) + BundleVec.emplace_back(std::make_unique>(LSI)); + else + BundleVec.back()->insert(LSI, SE); + + SeedLookupMap[LSI] = BundleVec.back().get(); +} + +// Explicit instantiations +template void SeedContainer::insert(LoadInst *); +template void SeedContainer::insert(StoreInst *); + +#ifndef NDEBUG +void SeedContainer::dump() const { + for (const auto &Pair : Bundles) { + auto [I, Ty, Opc] = Pair.first; + const auto &SeedsVec = Pair.second; + std::string RefType = dyn_cast(I) ? "Load" + : dyn_cast(I) ? "Store" + : "Other"; + dbgs() << "[Inst=" << *I << " Ty=" << Ty << " " << RefType << "]\n"; + for (const auto &SeedPtr : SeedsVec) { + SeedPtr->dump(dbgs()); + dbgs() << "\n"; + } + } + dbgs() << "\n"; +} +#endif // NDEBUG + } // namespace llvm::sandboxir diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp index dd41b0a660509..82b230d50c4ec 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp @@ -40,6 +40,15 @@ struct SeedBundleTest : public testing::Test { } }; +// Stub class to make the abstract base class testable. +class SeedBundleForTest : public sandboxir::SeedBundle { +public: + using sandboxir::SeedBundle::SeedBundle; + void insert(sandboxir::Instruction *I, ScalarEvolution &SE) override { + insertAt(Seeds.end(), I); + } +}; + TEST_F(SeedBundleTest, SeedBundle) { parseIR(C, R"IR( define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) { @@ -66,7 +75,7 @@ define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) { // Assume first two instructions are identical in the number of bits. const unsigned IOBits = sandboxir::Utils::getNumBits(I0, DL); // Constructor - sandboxir::SeedBundle SBO(I0); + SeedBundleForTest SBO(I0); EXPECT_EQ(*SBO.begin(), I0); // getNumUnusedBits after constructor EXPECT_EQ(SBO.getNumUnusedBits(), IOBits); @@ -103,7 +112,7 @@ define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) { EXPECT_EQ(BundleBits, 88u); auto Seeds = Insts; // Constructor - sandboxir::SeedBundle SB1(std::move(Seeds)); + SeedBundleForTest SB1(std::move(Seeds)); // getNumUnusedBits after constructor EXPECT_EQ(SB1.getNumUnusedBits(), BundleBits); // setUsed with index @@ -196,3 +205,66 @@ define void @foo(ptr %ptrA, float %val, ptr %ptr) { sandboxir::LoadSeedBundle LB(std::move(Loads), SE); EXPECT_THAT(LB, testing::ElementsAre(L0, L1, L2, L3)); } + +TEST_F(SeedBundleTest, Container) { + parseIR(C, R"IR( +define void @foo(ptr %ptrA, float %val, ptr %ptrB) { +bb: + %gepA0 = getelementptr float, ptr %ptrA, i32 0 + %gepA1 = getelementptr float, ptr %ptrA, i32 1 + %gepB0 = getelementptr float, ptr %ptrB, i32 0 + %gepB1 = getelementptr float, ptr %ptrB, i32 1 + store float %val, ptr %gepA0 + store float %val, ptr %gepA1 + store float %val, ptr %gepB0 + store float %val, ptr %gepB1 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + + DominatorTree DT(LLVMF); + TargetLibraryInfoImpl TLII; + TargetLibraryInfo TLI(TLII); + DataLayout DL(M->getDataLayout()); + LoopInfo LI(DT); + AssumptionCache AC(LLVMF); + ScalarEvolution SE(LLVMF, TLI, AC, DT, LI); + + sandboxir::Context Ctx(C); + auto &F = *Ctx.createFunction(&LLVMF); + auto &BB = *F.begin(); + auto It = std::next(BB.begin(), 4); + auto *S0 = cast(&*It++); + auto *S1 = cast(&*It++); + auto *S2 = cast(&*It++); + auto *S3 = cast(&*It++); + sandboxir::SeedContainer SC(SE); + // Check begin() end() when empty. + EXPECT_EQ(SC.begin(), SC.end()); + + SC.insert(S0); + SC.insert(S1); + SC.insert(S2); + SC.insert(S3); + unsigned Cnt = 0; + SmallVector Bndls; + for (auto &SeedBndl : SC) { + EXPECT_EQ(SeedBndl.size(), 2u); + ++Cnt; + Bndls.push_back(&SeedBndl); + } + EXPECT_EQ(Cnt, 2u); + + // Mark them "Used" to check if operator++ skips them in the next loop. + for (auto *SeedBndl : Bndls) + for (auto Lane : seq(SeedBndl->size())) + SeedBndl->setUsed(Lane); + // Check if iterator::operator++ skips used lanes. + Cnt = 0; + for (auto &SeedBndl : SC) { + (void)SeedBndl; + ++Cnt; + } + EXPECT_EQ(Cnt, 0u); +}