diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h new file mode 100644 index 0000000000000..06fb41ac58db2 --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h @@ -0,0 +1,132 @@ +//===- SeedCollector.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file contains the mechanism for collecting the seed instructions that +// are used as starting points for forming the vectorization graph. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" +#include "llvm/SandboxIR/Value.h" +#include +#include + +namespace llvm::sandboxir { + +/// A set of candidate Instructions for vectorizing together. +class SeedBundle { +public: + /// Initialize a bundle with \p I. + explicit SeedBundle(Instruction *I) { insertAt(begin(), I); } + explicit SeedBundle(SmallVector &&L) : Seeds(std::move(L)) { + for (auto &S : Seeds) + NumUnusedBits += Utils::getNumBits(S); + } + /// No need to allow copies. + SeedBundle(const SeedBundle &) = delete; + SeedBundle &operator=(const SeedBundle &) = delete; + virtual ~SeedBundle() {} + + using iterator = SmallVector::iterator; + using const_iterator = SmallVector::const_iterator; + iterator begin() { return Seeds.begin(); } + iterator end() { return Seeds.end(); } + const_iterator begin() const { return Seeds.begin(); } + const_iterator end() const { return Seeds.end(); } + + Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; } + + /// Insert \p I into position \p P. Clients should choose Pos + /// by symbol, symbol-offset, and program order (which depends if scheduling + /// bottom-up or top-down). + void insertAt(iterator Pos, Instruction *I) { +#ifdef EXPENSIVE_CHECKS + for (auto Itr : Seeds) { + assert(*Itr != I && "Attempt to insert an instruction twice."); + } +#endif + Seeds.insert(Pos, I); + NumUnusedBits += Utils::getNumBits(I); + } + + unsigned getFirstUnusedElementIdx() const { + for (unsigned ElmIdx : seq(0, Seeds.size())) + if (!isUsed(ElmIdx)) + return ElmIdx; + return Seeds.size(); + } + /// Marks instruction \p I "used" within the bundle. Clients + /// use this property when assembling a vectorized instruction from + /// the seeds in a bundle. This allows constant time evaluation + /// and "removal" from the list. + void setUsed(Instruction *I) { + auto It = std::find(begin(), end(), I); + assert(It != end() && "Instruction not in the bundle!"); + auto Idx = It - begin(); + setUsed(Idx, 1, /*VerifyUnused=*/false); + } + + void setUsed(unsigned ElementIdx, unsigned Sz = 1, bool VerifyUnused = true) { + if (ElementIdx + Sz >= UsedLanes.size()) + UsedLanes.resize(ElementIdx + Sz); + for (unsigned Idx : seq(ElementIdx, ElementIdx + Sz)) { + assert((!VerifyUnused || !UsedLanes.test(Idx)) && + "Already marked as used!"); + UsedLanes.set(Idx); + UsedLaneCount++; + } + NumUnusedBits -= Utils::getNumBits(Seeds[ElementIdx]); + } + /// \Returns whether or not \p Element has been used. + bool isUsed(unsigned Element) const { + return Element < UsedLanes.size() && UsedLanes.test(Element); + } + bool allUsed() const { return UsedLaneCount == Seeds.size(); } + unsigned getNumUnusedBits() const { return NumUnusedBits; } + + /// \Returns a slice of seed elements, starting at the element \p StartIdx, + /// with a total size <= \p MaxVecRegBits, or an empty slice if the + /// requirements cannot be met . If \p ForcePowOf2 is true, then the returned + /// slice will have a total number of bits that is a power of 2. + MutableArrayRef + getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2); + +protected: + SmallVector Seeds; + /// The lanes that we have already vectorized. + BitVector UsedLanes; + /// Tracks used lanes for constant-time accessor. + unsigned UsedLaneCount = 0; + /// Tracks the remaining bits available to vectorize + unsigned NumUnusedBits = 0; + +public: +#ifndef NDEBUG + void dump(raw_ostream &OS) const { + for (auto [ElmIdx, I] : enumerate(*this)) { + OS.indent(2) << ElmIdx << ". "; + if (isUsed(ElmIdx)) + OS << "[USED]"; + else + OS << *I; + OS << "\n"; + } + } + LLVM_DUMP_METHOD void dump() const { + dump(dbgs()); + dbgs() << "\n"; + } +#endif // NDEBUG +}; +} // namespace llvm::sandboxir +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index eeff4a9f6a8ba..887c2089c5a52 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize SandboxVectorizer/DependencyGraph.cpp SandboxVectorizer/Passes/BottomUpVec.cpp SandboxVectorizer/SandboxVectorizer.cpp + SandboxVectorizer/SeedCollector.cpp SLPVectorizer.cpp Vectorize.cpp VectorCombine.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp new file mode 100644 index 0000000000000..e8b90386dc8f8 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp @@ -0,0 +1,65 @@ +//===- SeedCollection.cpp -0000000----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Type.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" +#include "llvm/Support/Debug.h" +#include + +using namespace llvm; +namespace llvm::sandboxir { + +MutableArrayRef SeedBundle::getSlice(unsigned StartIdx, + unsigned MaxVecRegBits, + bool ForcePowerOf2) { + // Use uint32_t here for compatibility with IsPowerOf2_32 + + // BitCount tracks the size of the working slice. From that we can tell + // when the working slice's size is a power-of-two and when it exceeds + // the legal size in MaxVecBits. + uint32_t BitCount = 0; + uint32_t NumElements = 0; + // Tracks the most recent slice where NumElements gave a power-of-2 BitCount + uint32_t NumElementsPowerOfTwo = 0; + uint32_t BitCountPowerOfTwo = 0; + // Can't start a slice with a used instruction. + assert(!isUsed(StartIdx) && "Expected unused at StartIdx"); + for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) { + uint32_t InstBits = Utils::getNumBits(S); + // Stop if this instruction is used, or if adding it puts the slice over + // the limit. + if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits) + break; + NumElements++; + BitCount += InstBits; + if (ForcePowerOf2 && isPowerOf2_32(BitCount)) { + NumElementsPowerOfTwo = NumElements; + BitCountPowerOfTwo = BitCount; + } + } + if (ForcePowerOf2) { + NumElements = NumElementsPowerOfTwo; + BitCount = BitCountPowerOfTwo; + } + + assert((!ForcePowerOf2 || isPowerOf2_32(BitCount)) && + "Must be a power of two"); + // Return any non-empty slice + if (NumElements > 1) + return MutableArrayRef(&Seeds[StartIdx], NumElements); + else + return {}; +} + +} // namespace llvm::sandboxir diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index 9f1a3409c0c39..dcd7232db5f60 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -11,4 +11,5 @@ add_llvm_unittest(SandboxVectorizerTests DependencyGraphTest.cpp IntervalTest.cpp LegalityTest.cpp - ) + SeedCollectorTest.cpp +) diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp new file mode 100644 index 0000000000000..ed6fd12052bbc --- /dev/null +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp @@ -0,0 +1,125 @@ +//===- SeedCollectorTest.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/SandboxIR/Function.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" +#include +using namespace llvm; + +struct SeedBundleTest : public testing::Test { + LLVMContext C; + std::unique_ptr M; + + void parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + M = parseAssemblyString(IR, Err, C); + if (!M) + Err.print("LegalityTest", errs()); + } +}; + +TEST_F(SeedBundleTest, SeedBundle) { + parseIR(C, R"IR( +define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) { +bb: + %add0 = fadd float %v0, %v0 + %add1 = fadd float %v0, %v0 + %add2 = add i8 %i2, %i2 + %add3 = add i16 %i1, %i1 + %add4 = add i32 %i0, %i0 + %add5 = add i16 %i1, %i1 + %add6 = add i8 %i2, %i2 + %add7 = add i8 %i2, %i2 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto &F = *Ctx.createFunction(&LLVMF); + DataLayout DL(M->getDataLayout()); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *I0 = &*It++; + auto *I1 = &*It++; + // Assume first two instructions are identical in the number of bits. + const unsigned IOBits = sandboxir::Utils::getNumBits(I0, DL); + // Constructor + sandboxir::SeedBundle SBO(I0); + EXPECT_EQ(*SBO.begin(), I0); + // getNumUnusedBits after constructor + EXPECT_EQ(SBO.getNumUnusedBits(), IOBits); + // setUsed + SBO.setUsed(I0); + // allUsed + EXPECT_TRUE(SBO.allUsed()); + // isUsed + EXPECT_TRUE(SBO.isUsed(0)); + // getNumUnusedBits after setUsed + EXPECT_EQ(SBO.getNumUnusedBits(), 0u); + // insertAt + SBO.insertAt(SBO.end(), I1); + EXPECT_NE(*SBO.begin(), I1); + // getNumUnusedBits after insertAt + EXPECT_EQ(SBO.getNumUnusedBits(), IOBits); + // allUsed + EXPECT_FALSE(SBO.allUsed()); + // getFirstUnusedElement + EXPECT_EQ(SBO.getFirstUnusedElementIdx(), 1u); + + SmallVector Insts; + // add2 through add7 + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + Insts.push_back(&*It++); + unsigned BundleBits = 0; + for (auto &S : Insts) + BundleBits += sandboxir::Utils::getNumBits(S); + // Ensure the instructions are as expected. + EXPECT_EQ(BundleBits, 88u); + auto Seeds = Insts; + // Constructor + sandboxir::SeedBundle SB1(std::move(Seeds)); + // getNumUnusedBits after constructor + EXPECT_EQ(SB1.getNumUnusedBits(), BundleBits); + // setUsed with index + SB1.setUsed(1); + // getFirstUnusedElementIdx + EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u); + SB1.setUsed(unsigned(0)); + // getFirstUnusedElementIdx not at end + EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u); + + // getSlice is (StartIdx, MaxVecRegBits, ForcePowerOf2). It's easier to + // compare test cases without the parameter-name comments inline. + auto Slice0 = SB1.getSlice(2, 64, true); + EXPECT_THAT(Slice0, + testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5])); + auto Slice1 = SB1.getSlice(2, 72, true); + EXPECT_THAT(Slice1, + testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5])); + auto Slice2 = SB1.getSlice(2, 80, true); + EXPECT_THAT(Slice2, + testing::ElementsAre(Insts[2], Insts[3], Insts[4], Insts[5])); + + SB1.setUsed(2); + auto Slice3 = SB1.getSlice(3, 64, false); + EXPECT_THAT(Slice3, testing::ElementsAre(Insts[3], Insts[4], Insts[5])); + // getSlice empty case + SB1.setUsed(3); + auto Slice4 = SB1.getSlice(4, /* MaxVecRegBits */ 8, + /* ForcePowerOf2 */ true); + EXPECT_EQ(Slice4.size(), 0u); +}