llvm · Sterling-Augustine · Oct 21, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 21, 2024
diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h
@@ -60,6 +60,16 @@ class Utils {
         getUnderlyingObject(LSI->getPointerOperand()->Val));
   }
 
+  /// \Returns the number of elements in \p Ty, that is the number of lanes
+  /// if a fixed vector or 1 if scalar. ScalableVectors
+  static int getNumElements(Type *Ty) {
+    return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() : 1;
+  }
+  /// Returns \p Ty if scalar or its element type if vector.
+  static Type *getElementType(Type *Ty) {
+    return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getElementType() : Ty;
+  }
+
   /// \Returns the number of bits required to represent the operands or return
   /// value of \p V in \p DL.
   static unsigned getNumBits(Value *V, const DataLayout &DL) {

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -284,6 +284,36 @@ class SeedContainer {
 #endif // NDEBUG
 };
 
+class SeedCollector {
+  SeedContainer StoreSeeds;
+  SeedContainer LoadSeeds;
+  BasicBlock *BB;
+  Context &Ctx;
+
+  /// \Returns the number of SeedBundle groups for all seed types.
+  /// This is to be used for limiting compilation time.
+  unsigned totalNumSeedGroups() const {
+    return StoreSeeds.size() + LoadSeeds.size();
+  }
+
+public:
+  SeedCollector(BasicBlock *SBBB, ScalarEvolution &SE);
+  ~SeedCollector();
+
+  BasicBlock *getBasicBlock() { return BB; }
+
+  iterator_range<SeedContainer::iterator> getStoreSeeds() {
+    return {StoreSeeds.begin(), StoreSeeds.end()};
+  }
+  iterator_range<SeedContainer::iterator> getLoadSeeds() {
+    return {LoadSeeds.begin(), LoadSeeds.end()};
+  }
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const;
+  LLVM_DUMP_METHOD void dump() const;
+#endif
+};
+
 } // namespace llvm::sandboxir
 
 #endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -22,6 +22,23 @@ namespace llvm::sandboxir {
 cl::opt<unsigned> SeedBundleSizeLimit(
     "sbvec-seed-bundle-size-limit", cl::init(32), cl::Hidden,
     cl::desc("Limit the size of the seed bundle to cap compilation time."));
+cl::opt<bool>
+    DisableStoreSeeds("sbvec-disable-store-seeds", cl::init(false), cl::Hidden,
+                      cl::desc("Don't collect store seed instructions."));
+cl::opt<bool>
+    DisableLoadSeeds("sbvec-disable-load-seeds", cl::init(true), cl::Hidden,
+                     cl::desc("Don't collect load seed instructions."));
+
+#define LoadSeedsDef "loads"
+#define StoreSeedsDef "stores"
+cl::opt<std::string>
+    ForceSeed("sbvec-force-seeds", cl::init(""), cl::Hidden,
+              cl::desc("Enable only this type of seeds. This can be one "
+                       "of: '" LoadSeedsDef "','" StoreSeedsDef "'."));
+cl::opt<unsigned> SeedGroupsLimit(
+    "sbvec-seed-groups-limit", cl::init(256), cl::Hidden,
+    cl::desc("Limit the number of collected seeds groups in a BB to "
+             "cap compilation time."));
 
 MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
                                                     unsigned MaxVecRegBits,
@@ -131,4 +148,74 @@ void SeedContainer::print(raw_ostream &OS) const {
 LLVM_DUMP_METHOD void SeedContainer::dump() const { print(dbgs()); }
 #endif // NDEBUG
 
+template <typename LoadOrStoreT> static bool isValidMemSeed(LoadOrStoreT *LSI) {
+  if (LSI->isSimple())
+    return true;
+  auto *Ty = Utils::getExpectedType(LSI);
+  // Omit types that are architecturally unvectorizable
+  if (Ty->isX86_FP80Ty() || Ty->isPPC_FP128Ty())
+    return false;
+  // Omit vector types without compile-time-known lane counts
+  if (isa<ScalableVectorType>(Ty))
+    return false;
+  if (auto *VTy = dyn_cast<FixedVectorType>(Ty))
+    return VectorType::isValidElementType(VTy->getElementType());
+  return VectorType::isValidElementType(Ty);
+}
+
+template bool isValidMemSeed(LoadInst *LSI);
+template bool isValidMemSeed<StoreInst>(StoreInst *LSI);
+
+SeedCollector::SeedCollector(BasicBlock *SBBB, ScalarEvolution &SE)
+    : StoreSeeds(SE), LoadSeeds(SE), BB(SBBB), Ctx(BB->getContext()) {
+  // TODO: Register a callback for updating the Collector datastructures upon
+  // instr removal
+
+  bool CollectStores = !DisableStoreSeeds;
+  bool CollectLoads = !DisableLoadSeeds;
+  if (LLVM_UNLIKELY(!ForceSeed.empty())) {
+    CollectStores = false;
+    CollectLoads = false;
+    // Enable only the selected one.
+    if (ForceSeed == StoreSeedsDef)
+      CollectStores = true;
+    else if (ForceSeed == LoadSeedsDef)
+      CollectLoads = true;
+    else {
+      errs() << "Bad argument '" << ForceSeed << "' in -" << ForceSeed.ArgStr
+             << "='" << ForceSeed << "'.\n";
+      errs() << "Description: " << ForceSeed.HelpStr << "\n";
+      exit(1);
+    }
+  }
+  // Actually collect the seeds.
+  for (auto &I : *BB) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+      if (CollectStores && isValidMemSeed(SI))
+        StoreSeeds.insert(SI);
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+      if (CollectLoads && isValidMemSeed(LI))
+        LoadSeeds.insert(LI);
+    // Cap compilation time.
+    if (totalNumSeedGroups() > SeedGroupsLimit)
+      break;
+  }
+}
+
+SeedCollector::~SeedCollector() {
+  // TODO: Unregister the callback for updating the seed datastructures upon
+  // instr removal
+}
+
+#ifndef NDEBUG
+void SeedCollector::print(raw_ostream &OS) const {
+  OS << "=== StoreSeeds ===\n";
+  StoreSeeds.print(OS);
+  OS << "=== LoadSeeds ===\n";
+  LoadSeeds.print(OS);
+}
+
+void SeedCollector::dump() const { print(dbgs()); }
+#endif
+
 } // namespace llvm::sandboxir
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SeedCollectorTest.cpp
@@ -268,3 +268,171 @@ define void @foo(ptr %ptrA, float %val, ptr %ptrB) {
   }
   EXPECT_EQ(Cnt, 0u);
 }
+
+TEST_F(SeedBundleTest, ConsecutiveStores) {
+  // Where "Consecutive" means the stores address consecutive locations in
+  // memory, but not in program order. Check to see that the collector puts them
+  // in the proper order for vectorization.
+  parseIR(C, R"IR(
+define void @foo(ptr noalias %ptr, float %val) {
+bb:
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ptr2 = getelementptr float, ptr %ptr, i32 2
+  %ptr3 = getelementptr float, ptr %ptr, i32 3
+  store float %val, ptr %ptr0
+  store float %val, ptr %ptr2
+  store float %val, ptr %ptr1
+  store float %val, ptr %ptr3
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  DominatorTree DT(LLVMF);
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI(TLII);
+  DataLayout DL(M->getDataLayout());
+  LoopInfo LI(DT);
+  AssumptionCache AC(LLVMF);
+  ScalarEvolution SE(LLVMF, TLI, AC, DT, LI);
+
+  sandboxir::Context Ctx(C);
+  auto &F = *Ctx.createFunction(&LLVMF);
+  auto BB = F.begin();
+  sandboxir::SeedCollector SC(&*BB, SE);
+
+  // Find the stores
+  auto It = std::next(BB->begin(), 4);
+  // StX with X as the order by offset in memory
+  auto *St0 = &*It++;
+  auto *St2 = &*It++;
+  auto *St1 = &*It++;
+  auto *St3 = &*It++;
+
+  auto StoreSeedsRange = SC.getStoreSeeds();
+  auto &SB = *StoreSeedsRange.begin();
+  // Expect just one vector of store seeds
+  EXPECT_TRUE(std::next(StoreSeedsRange.begin()) == StoreSeedsRange.end());
+  EXPECT_THAT(SB, testing::ElementsAre(St0, St1, St2, St3));
+}
+
+TEST_F(SeedBundleTest, StoresWithGaps) {
+  parseIR(C, R"IR(
+define void @foo(ptr noalias %ptr, float %val) {
+bb:
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 3
+  %ptr2 = getelementptr float, ptr %ptr, i32 5
+  %ptr3 = getelementptr float, ptr %ptr, i32 7
+  store float %val, ptr %ptr0
+  store float %val, ptr %ptr2
+  store float %val, ptr %ptr1
+  store float %val, ptr %ptr3
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  DominatorTree DT(LLVMF);
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI(TLII);
+  DataLayout DL(M->getDataLayout());
+  LoopInfo LI(DT);
+  AssumptionCache AC(LLVMF);
+  ScalarEvolution SE(LLVMF, TLI, AC, DT, LI);
+
+  sandboxir::Context Ctx(C);
+  auto &F = *Ctx.createFunction(&LLVMF);
+  auto BB = F.begin();
+  sandboxir::SeedCollector SC(&*BB, SE);
+
+  // Find the stores
+  auto It = std::next(BB->begin(), 4);
+  // StX with X as the order by offset in memory
+  auto *St0 = &*It++;
+  auto *St2 = &*It++;
+  auto *St1 = &*It++;
+  auto *St3 = &*It++;
+
+  auto StoreSeedsRange = SC.getStoreSeeds();
+  auto &SB = *StoreSeedsRange.begin();
+  // Expect just one vector of store seeds
+  EXPECT_TRUE(std::next(StoreSeedsRange.begin()) == StoreSeedsRange.end());
+  EXPECT_THAT(SB, testing::ElementsAre(St0, St1, St2, St3));
+}
+
+TEST_F(SeedBundleTest, VectorStores) {
+  parseIR(C, R"IR(
+define void @foo(ptr noalias %ptr, <2 x float> %val) {
+bb:
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr2 = getelementptr float, ptr %ptr, i32 2
+  store <2 x float> %val, ptr %ptr2
+  store <2 x float> %val, ptr %ptr0
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  DominatorTree DT(LLVMF);
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI(TLII);
+  DataLayout DL(M->getDataLayout());
+  LoopInfo LI(DT);
+  AssumptionCache AC(LLVMF);
+  ScalarEvolution SE(LLVMF, TLI, AC, DT, LI);
+
+  sandboxir::Context Ctx(C);
+  auto &F = *Ctx.createFunction(&LLVMF);
+  auto BB = F.begin();
+  sandboxir::SeedCollector SC(&*BB, SE);
+
+  // Find the stores
+  auto It = std::next(BB->begin(), 2);
+  // StX with X as the order by offset in memory
+  auto *St2 = &*It++;
+  auto *St0 = &*It++;
+
+  auto StoreSeedsRange = SC.getStoreSeeds();
+  auto &SB = *StoreSeedsRange.begin();
+  EXPECT_TRUE(std::next(StoreSeedsRange.begin()) == StoreSeedsRange.end());
+  EXPECT_THAT(SB, testing::ElementsAre(St0, St2));
+}
+
+TEST_F(SeedBundleTest, MixedScalarVectors) {
+  parseIR(C, R"IR(
+define void @foo(ptr noalias %ptr, float %v, <2 x float> %val) {
+bb:
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ptr3 = getelementptr float, ptr %ptr, i32 3
+  store float %v, ptr %ptr0
+  store float %v, ptr %ptr3
+  store <2 x float> %val, ptr %ptr1
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+  DominatorTree DT(LLVMF);
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI(TLII);
+  DataLayout DL(M->getDataLayout());
+  LoopInfo LI(DT);
+  AssumptionCache AC(LLVMF);
+  ScalarEvolution SE(LLVMF, TLI, AC, DT, LI);
+
+  sandboxir::Context Ctx(C);
+  auto &F = *Ctx.createFunction(&LLVMF);
+  auto BB = F.begin();
+  sandboxir::SeedCollector SC(&*BB, SE);
+
+  // Find the stores
+  auto It = std::next(BB->begin(), 3);
+  // StX with X as the order by offset in memory
+  auto *St0 = &*It++;
+  auto *St3 = &*It++;
+  auto *St1 = &*It++;
+
+  auto &SB = *SC.getStoreSeeds().begin();
+  EXPECT_TRUE(std::next(SC.getStoreSeeds().begin()) ==
+              SC.getStoreSeeds().end());
+  EXPECT_THAT(SB, testing::ElementsAre(St0, St1, St3));
+}