-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SandboxVectorizer] New class to actually collect and manage seeds #112979
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d11ad2d
93a1016
23cc5e7
d4f53de
de1203a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,6 +60,16 @@ class Utils { | |
| getUnderlyingObject(LSI->getPointerOperand()->Val)); | ||
| } | ||
|
|
||
| /// \Returns the number of elements in \p Ty, that is the number of lanes | ||
| /// if a fixed vector or 1 if scalar. ScalableVectors | ||
|
||
| static int getNumElements(Type *Ty) { | ||
| return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() : 1; | ||
| } | ||
| /// Returns \p Ty if scalar or its element type if vector. | ||
| static Type *getElementType(Type *Ty) { | ||
| return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getElementType() : Ty; | ||
| } | ||
|
|
||
| /// \Returns the number of bits required to represent the operands or return | ||
| /// value of \p V in \p DL. | ||
| static unsigned getNumBits(Value *V, const DataLayout &DL) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -284,6 +284,36 @@ class SeedContainer { | |
| #endif // NDEBUG | ||
| }; | ||
|
|
||
| class SeedCollector { | ||
| SeedContainer StoreSeeds; | ||
| SeedContainer LoadSeeds; | ||
| BasicBlock *BB; | ||
| Context &Ctx; | ||
|
|
||
| /// \Returns the number of SeedBundle groups for all seed types. | ||
| /// This is to be used for limiting compilation time. | ||
| unsigned totalNumSeedGroups() const { | ||
| return StoreSeeds.size() + LoadSeeds.size(); | ||
| } | ||
|
|
||
| public: | ||
| SeedCollector(BasicBlock *SBBB, ScalarEvolution &SE); | ||
| ~SeedCollector(); | ||
|
|
||
| BasicBlock *getBasicBlock() { return BB; } | ||
|
||
|
|
||
| iterator_range<SeedContainer::iterator> getStoreSeeds() { | ||
| return {StoreSeeds.begin(), StoreSeeds.end()}; | ||
| } | ||
| iterator_range<SeedContainer::iterator> getLoadSeeds() { | ||
| return {LoadSeeds.begin(), LoadSeeds.end()}; | ||
| } | ||
| #ifndef NDEBUG | ||
| void print(raw_ostream &OS) const; | ||
| LLVM_DUMP_METHOD void dump() const; | ||
| #endif | ||
| }; | ||
|
|
||
| } // namespace llvm::sandboxir | ||
|
|
||
| #endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,23 @@ namespace llvm::sandboxir { | |
| cl::opt<unsigned> SeedBundleSizeLimit( | ||
| "sbvec-seed-bundle-size-limit", cl::init(32), cl::Hidden, | ||
| cl::desc("Limit the size of the seed bundle to cap compilation time.")); | ||
| cl::opt<bool> | ||
| DisableStoreSeeds("sbvec-disable-store-seeds", cl::init(false), cl::Hidden, | ||
| cl::desc("Don't collect store seed instructions.")); | ||
| cl::opt<bool> | ||
| DisableLoadSeeds("sbvec-disable-load-seeds", cl::init(true), cl::Hidden, | ||
| cl::desc("Don't collect load seed instructions.")); | ||
|
|
||
| #define LoadSeedsDef "loads" | ||
| #define StoreSeedsDef "stores" | ||
| cl::opt<std::string> | ||
| ForceSeed("sbvec-force-seeds", cl::init(""), cl::Hidden, | ||
| cl::desc("Enable only this type of seeds. This can be one " | ||
| "of: '" LoadSeedsDef "','" StoreSeedsDef "'.")); | ||
|
||
| cl::opt<unsigned> SeedGroupsLimit( | ||
| "sbvec-seed-groups-limit", cl::init(256), cl::Hidden, | ||
| cl::desc("Limit the number of collected seeds groups in a BB to " | ||
| "cap compilation time.")); | ||
|
|
||
| MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx, | ||
| unsigned MaxVecRegBits, | ||
|
|
@@ -131,4 +148,74 @@ void SeedContainer::print(raw_ostream &OS) const { | |
| LLVM_DUMP_METHOD void SeedContainer::dump() const { print(dbgs()); } | ||
| #endif // NDEBUG | ||
|
|
||
| template <typename LoadOrStoreT> static bool isValidMemSeed(LoadOrStoreT *LSI) { | ||
| if (LSI->isSimple()) | ||
| return true; | ||
| auto *Ty = Utils::getExpectedType(LSI); | ||
| // Omit types that are architecturally unvectorizable | ||
| if (Ty->isX86_FP80Ty() || Ty->isPPC_FP128Ty()) | ||
| return false; | ||
| // Omit vector types without compile-time-known lane counts | ||
| if (isa<ScalableVectorType>(Ty)) | ||
| return false; | ||
| if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) | ||
| return VectorType::isValidElementType(VTy->getElementType()); | ||
| return VectorType::isValidElementType(Ty); | ||
| } | ||
|
|
||
| template bool isValidMemSeed(LoadInst *LSI); | ||
| template bool isValidMemSeed<StoreInst>(StoreInst *LSI); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not make both instantiations similar, like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
|
||
| SeedCollector::SeedCollector(BasicBlock *SBBB, ScalarEvolution &SE) | ||
| : StoreSeeds(SE), LoadSeeds(SE), BB(SBBB), Ctx(BB->getContext()) { | ||
| // TODO: Register a callback for updating the Collector datastructures upon | ||
| // instr removal | ||
|
|
||
| bool CollectStores = !DisableStoreSeeds; | ||
| bool CollectLoads = !DisableLoadSeeds; | ||
| if (LLVM_UNLIKELY(!ForceSeed.empty())) { | ||
| CollectStores = false; | ||
| CollectLoads = false; | ||
| // Enable only the selected one. | ||
| if (ForceSeed == StoreSeedsDef) | ||
| CollectStores = true; | ||
| else if (ForceSeed == LoadSeedsDef) | ||
| CollectLoads = true; | ||
| else { | ||
| errs() << "Bad argument '" << ForceSeed << "' in -" << ForceSeed.ArgStr | ||
| << "='" << ForceSeed << "'.\n"; | ||
| errs() << "Description: " << ForceSeed.HelpStr << "\n"; | ||
| exit(1); | ||
| } | ||
| } | ||
| // Actually collect the seeds. | ||
| for (auto &I : *BB) { | ||
| if (StoreInst *SI = dyn_cast<StoreInst>(&I)) | ||
| if (CollectStores && isValidMemSeed(SI)) | ||
| StoreSeeds.insert(SI); | ||
| if (LoadInst *LI = dyn_cast<LoadInst>(&I)) | ||
| if (CollectLoads && isValidMemSeed(LI)) | ||
| LoadSeeds.insert(LI); | ||
| // Cap compilation time. | ||
| if (totalNumSeedGroups() > SeedGroupsLimit) | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| SeedCollector::~SeedCollector() { | ||
| // TODO: Unregister the callback for updating the seed datastructures upon | ||
| // instr removal | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
| void SeedCollector::print(raw_ostream &OS) const { | ||
| OS << "=== StoreSeeds ===\n"; | ||
| StoreSeeds.print(OS); | ||
| OS << "=== LoadSeeds ===\n"; | ||
| LoadSeeds.print(OS); | ||
| } | ||
|
|
||
| void SeedCollector::dump() const { print(dbgs()); } | ||
| #endif | ||
|
|
||
| } // namespace llvm::sandboxir | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -268,3 +268,171 @@ define void @foo(ptr %ptrA, float %val, ptr %ptrB) { | |
| } | ||
| EXPECT_EQ(Cnt, 0u); | ||
| } | ||
|
|
||
| TEST_F(SeedBundleTest, ConsecutiveStores) { | ||
| // Where "Consecutive" means the stores address consecutive locations in | ||
| // memory, but not in program order. Check to see that the collector puts them | ||
| // in the proper order for vectorization. | ||
| parseIR(C, R"IR( | ||
| define void @foo(ptr noalias %ptr, float %val) { | ||
| bb: | ||
| %ptr0 = getelementptr float, ptr %ptr, i32 0 | ||
| %ptr1 = getelementptr float, ptr %ptr, i32 1 | ||
| %ptr2 = getelementptr float, ptr %ptr, i32 2 | ||
| %ptr3 = getelementptr float, ptr %ptr, i32 3 | ||
| store float %val, ptr %ptr0 | ||
| store float %val, ptr %ptr2 | ||
| store float %val, ptr %ptr1 | ||
| store float %val, ptr %ptr3 | ||
| ret void | ||
| } | ||
| )IR"); | ||
| Function &LLVMF = *M->getFunction("foo"); | ||
| DominatorTree DT(LLVMF); | ||
| TargetLibraryInfoImpl TLII; | ||
| TargetLibraryInfo TLI(TLII); | ||
| DataLayout DL(M->getDataLayout()); | ||
| LoopInfo LI(DT); | ||
| AssumptionCache AC(LLVMF); | ||
| ScalarEvolution SE(LLVMF, TLI, AC, DT, LI); | ||
|
|
||
| sandboxir::Context Ctx(C); | ||
| auto &F = *Ctx.createFunction(&LLVMF); | ||
| auto BB = F.begin(); | ||
| sandboxir::SeedCollector SC(&*BB, SE); | ||
|
|
||
| // Find the stores | ||
| auto It = std::next(BB->begin(), 4); | ||
| // StX with X as the order by offset in memory | ||
| auto *St0 = &*It++; | ||
| auto *St2 = &*It++; | ||
| auto *St1 = &*It++; | ||
| auto *St3 = &*It++; | ||
|
|
||
| auto StoreSeedsRange = SC.getStoreSeeds(); | ||
| auto &SB = *StoreSeedsRange.begin(); | ||
| // Expect just one vector of store seeds | ||
| EXPECT_TRUE(std::next(StoreSeedsRange.begin()) == StoreSeedsRange.end()); | ||
|
||
| EXPECT_THAT(SB, testing::ElementsAre(St0, St1, St2, St3)); | ||
| } | ||
|
|
||
| TEST_F(SeedBundleTest, StoresWithGaps) { | ||
| parseIR(C, R"IR( | ||
| define void @foo(ptr noalias %ptr, float %val) { | ||
| bb: | ||
| %ptr0 = getelementptr float, ptr %ptr, i32 0 | ||
| %ptr1 = getelementptr float, ptr %ptr, i32 3 | ||
| %ptr2 = getelementptr float, ptr %ptr, i32 5 | ||
| %ptr3 = getelementptr float, ptr %ptr, i32 7 | ||
| store float %val, ptr %ptr0 | ||
| store float %val, ptr %ptr2 | ||
| store float %val, ptr %ptr1 | ||
| store float %val, ptr %ptr3 | ||
| ret void | ||
| } | ||
| )IR"); | ||
| Function &LLVMF = *M->getFunction("foo"); | ||
| DominatorTree DT(LLVMF); | ||
| TargetLibraryInfoImpl TLII; | ||
| TargetLibraryInfo TLI(TLII); | ||
| DataLayout DL(M->getDataLayout()); | ||
| LoopInfo LI(DT); | ||
| AssumptionCache AC(LLVMF); | ||
| ScalarEvolution SE(LLVMF, TLI, AC, DT, LI); | ||
|
|
||
| sandboxir::Context Ctx(C); | ||
| auto &F = *Ctx.createFunction(&LLVMF); | ||
| auto BB = F.begin(); | ||
| sandboxir::SeedCollector SC(&*BB, SE); | ||
|
|
||
| // Find the stores | ||
| auto It = std::next(BB->begin(), 4); | ||
| // StX with X as the order by offset in memory | ||
| auto *St0 = &*It++; | ||
| auto *St2 = &*It++; | ||
| auto *St1 = &*It++; | ||
| auto *St3 = &*It++; | ||
|
|
||
| auto StoreSeedsRange = SC.getStoreSeeds(); | ||
| auto &SB = *StoreSeedsRange.begin(); | ||
| // Expect just one vector of store seeds | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought that the bundle contained stores to consecutive memory addresses. Or isn't this the case? If not, is there an API to get the consecutive ones?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bundles contain stores in increasing offsets from a symbol, but those offsets may or may not be consecutive in memory. If instructions A and B are consecutive in the bundle but are not consecutive in memory, then there there is no instruction C in the bundle that is consecutive in memory with A. We can't check for consecutiveness until after all seeds have been collected. A series of program-consecutive stores might not be in memory-consecutive order, but still store to a contiguous block of memory. We can either have "getSlice" check for in memory consecutiveness, or leave that as part of the additional legality checking that will also need to be done. I'm not sure which is better.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This actually brings up another problem, in that a slice could contain two writes to the exact same memory location, which would confuse things like lane-counting and power-of-two calculations. I suppose one of the stores must be dead in that case, but getSlice doesn't know anything about that. Can we assume the scheduler and legality checking obviate that problem? Or should this be a TODO?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So if I understand correctly if there are four stores to addresses A[0],A[1] they will end up in the same bundle. We will eventually need some way to access them separately. This won't be a legality issue though. |
||
| EXPECT_TRUE(std::next(StoreSeedsRange.begin()) == StoreSeedsRange.end()); | ||
|
||
| EXPECT_THAT(SB, testing::ElementsAre(St0, St1, St2, St3)); | ||
| } | ||
|
|
||
| TEST_F(SeedBundleTest, VectorStores) { | ||
| parseIR(C, R"IR( | ||
| define void @foo(ptr noalias %ptr, <2 x float> %val) { | ||
| bb: | ||
| %ptr0 = getelementptr float, ptr %ptr, i32 0 | ||
| %ptr2 = getelementptr float, ptr %ptr, i32 2 | ||
|
||
| store <2 x float> %val, ptr %ptr2 | ||
| store <2 x float> %val, ptr %ptr0 | ||
| ret void | ||
| } | ||
| )IR"); | ||
| Function &LLVMF = *M->getFunction("foo"); | ||
| DominatorTree DT(LLVMF); | ||
| TargetLibraryInfoImpl TLII; | ||
| TargetLibraryInfo TLI(TLII); | ||
| DataLayout DL(M->getDataLayout()); | ||
| LoopInfo LI(DT); | ||
| AssumptionCache AC(LLVMF); | ||
| ScalarEvolution SE(LLVMF, TLI, AC, DT, LI); | ||
|
|
||
| sandboxir::Context Ctx(C); | ||
| auto &F = *Ctx.createFunction(&LLVMF); | ||
| auto BB = F.begin(); | ||
| sandboxir::SeedCollector SC(&*BB, SE); | ||
|
|
||
| // Find the stores | ||
| auto It = std::next(BB->begin(), 2); | ||
| // StX with X as the order by offset in memory | ||
| auto *St2 = &*It++; | ||
| auto *St0 = &*It++; | ||
|
|
||
| auto StoreSeedsRange = SC.getStoreSeeds(); | ||
| auto &SB = *StoreSeedsRange.begin(); | ||
| EXPECT_TRUE(std::next(StoreSeedsRange.begin()) == StoreSeedsRange.end()); | ||
|
||
| EXPECT_THAT(SB, testing::ElementsAre(St0, St2)); | ||
| } | ||
|
|
||
| TEST_F(SeedBundleTest, MixedScalarVectors) { | ||
| parseIR(C, R"IR( | ||
| define void @foo(ptr noalias %ptr, float %v, <2 x float> %val) { | ||
| bb: | ||
| %ptr0 = getelementptr float, ptr %ptr, i32 0 | ||
| %ptr1 = getelementptr float, ptr %ptr, i32 1 | ||
| %ptr3 = getelementptr float, ptr %ptr, i32 3 | ||
| store float %v, ptr %ptr0 | ||
| store float %v, ptr %ptr3 | ||
| store <2 x float> %val, ptr %ptr1 | ||
| ret void | ||
| } | ||
| )IR"); | ||
| Function &LLVMF = *M->getFunction("foo"); | ||
| DominatorTree DT(LLVMF); | ||
| TargetLibraryInfoImpl TLII; | ||
| TargetLibraryInfo TLI(TLII); | ||
| DataLayout DL(M->getDataLayout()); | ||
| LoopInfo LI(DT); | ||
| AssumptionCache AC(LLVMF); | ||
| ScalarEvolution SE(LLVMF, TLI, AC, DT, LI); | ||
|
|
||
| sandboxir::Context Ctx(C); | ||
| auto &F = *Ctx.createFunction(&LLVMF); | ||
| auto BB = F.begin(); | ||
| sandboxir::SeedCollector SC(&*BB, SE); | ||
|
|
||
| // Find the stores | ||
| auto It = std::next(BB->begin(), 3); | ||
| // StX with X as the order by offset in memory | ||
| auto *St0 = &*It++; | ||
| auto *St3 = &*It++; | ||
| auto *St1 = &*It++; | ||
|
|
||
| auto &SB = *SC.getStoreSeeds().begin(); | ||
| EXPECT_TRUE(std::next(SC.getStoreSeeds().begin()) == | ||
|
||
| SC.getStoreSeeds().end()); | ||
| EXPECT_THAT(SB, testing::ElementsAre(St0, St1, St3)); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should probably move both functions to
llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.hbecause they will most likely be used just by the vectorizer.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right now there is no VecUtils.h in the sandbox vectorizer. Let's wait to do this until there are enough functions where it makes sense.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am actually using this exact function in one of the Legality patches and I placed it in
VecUtils.h, but it's up to you if you want to create the file now or later, either way is fine with me. If you decide to keep it inUtils.hplease add a TODO so that we remember to clean it up later.