77// ===----------------------------------------------------------------------===//
88//
99// A simple pass that looks at local memory arrays that are statically
10- // sized and sets an appropriate alignment for them . This enables vectorization
10+ // sized and potentially increases their alignment. This enables vectorization
1111// of loads/stores to these arrays if not explicitly specified by the client.
1212//
1313// TODO: Ideally we should do a bin-packing of local arrays to maximize
1616// ===----------------------------------------------------------------------===//
1717
1818#include " NVPTX.h"
19+ #include " llvm/Analysis/TargetTransformInfo.h"
1920#include " llvm/IR/DataLayout.h"
2021#include " llvm/IR/Instructions.h"
2122#include " llvm/IR/Module.h"
23+ #include " llvm/IR/PassManager.h"
2224#include " llvm/Pass.h"
2325#include " llvm/Support/CommandLine.h"
2426#include " llvm/Support/MathExtras.h"
27+ #include " llvm/Support/NVPTXAddrSpace.h"
2528
2629using namespace llvm ;
2730
@@ -30,16 +33,35 @@ static cl::opt<bool>
3033 cl::init (false ), cl::Hidden,
3134 cl::desc(" Use maximum alignment for local memory" ));
3235
33- static constexpr Align MaxPTXArrayAlignment = Align::Constant<16 >();
36+ static Align getMaxLocalArrayAlignment (const TargetTransformInfo &TTI) {
37+ const unsigned MaxBitWidth =
38+ TTI.getLoadStoreVecRegBitWidth (NVPTXAS::ADDRESS_SPACE_LOCAL);
39+ return Align (MaxBitWidth / 8 );
40+ }
41+
42+ namespace {
43+ struct NVPTXIncreaseLocalAlignment {
44+ const Align MaxAlign;
45+
46+ NVPTXIncreaseLocalAlignment (const TargetTransformInfo &TTI)
47+ : MaxAlign(getMaxLocalArrayAlignment(TTI)) {}
48+
49+ bool run (Function &F);
50+ bool updateAllocaAlignment (AllocaInst *Alloca, const DataLayout &DL);
51+ Align getAggressiveArrayAlignment (unsigned ArraySize);
52+ Align getConservativeArrayAlignment (unsigned ArraySize);
53+ };
54+ } // namespace
3455
3556// / Get the maximum useful alignment for an array. This is more likely to
3657// / produce holes in the local memory.
3758// /
3859// / Choose an alignment large enough that the entire array could be loaded with
3960// / a single vector load (if possible). Cap the alignment at
4061// / MaxPTXArrayAlignment.
41- static Align getAggressiveArrayAlignment (const unsigned ArraySize) {
42- return std::min (MaxPTXArrayAlignment, Align (PowerOf2Ceil (ArraySize)));
62+ Align NVPTXIncreaseLocalAlignment::getAggressiveArrayAlignment (
63+ const unsigned ArraySize) {
64+ return std::min (MaxAlign, Align (PowerOf2Ceil (ArraySize)));
4365}
4466
4567// / Get the alignment of arrays that reduces the chances of leaving holes when
@@ -49,20 +71,18 @@ static Align getAggressiveArrayAlignment(const unsigned ArraySize) {
4971// / Choose the largest alignment such that the array size is a multiple of the
5072// / alignment. If all elements of the buffer are allocated in order of
5173// / alignment (higher to lower) no holes will be left.
52- static Align getConservativeArrayAlignment (const unsigned ArraySize) {
53- return commonAlignment (MaxPTXArrayAlignment, ArraySize);
74+ Align NVPTXIncreaseLocalAlignment::getConservativeArrayAlignment (
75+ const unsigned ArraySize) {
76+ return commonAlignment (MaxAlign, ArraySize);
5477}
5578
5679// / Find a better alignment for local arrays
57- static bool updateAllocaAlignment (const DataLayout &DL, AllocaInst *Alloca) {
80+ bool NVPTXIncreaseLocalAlignment::updateAllocaAlignment (AllocaInst *Alloca,
81+ const DataLayout &DL) {
5882 // Looking for statically sized local arrays
5983 if (!Alloca->isStaticAlloca ())
6084 return false ;
6185
62- // For now, we only support array allocas
63- if (!(Alloca->isArrayAllocation () || Alloca->getAllocatedType ()->isArrayTy ()))
64- return false ;
65-
6686 const auto ArraySize = Alloca->getAllocationSize (DL);
6787 if (!(ArraySize && ArraySize->isFixed ()))
6888 return false ;
@@ -80,14 +100,14 @@ static bool updateAllocaAlignment(const DataLayout &DL, AllocaInst *Alloca) {
80100 return false ;
81101}
82102
83- static bool runSetLocalArrayAlignment (Function &F) {
103+ bool NVPTXIncreaseLocalAlignment::run (Function &F) {
84104 bool Changed = false ;
85- const DataLayout &DL = F.getParent ()->getDataLayout ();
105+ const auto &DL = F.getParent ()->getDataLayout ();
86106
87107 BasicBlock &EntryBB = F.getEntryBlock ();
88108 for (Instruction &I : EntryBB)
89109 if (AllocaInst *Alloca = dyn_cast<AllocaInst>(&I))
90- Changed |= updateAllocaAlignment (DL, Alloca );
110+ Changed |= updateAllocaAlignment (Alloca, DL );
91111
92112 return Changed;
93113}
@@ -98,6 +118,9 @@ struct NVPTXIncreaseLocalAlignmentLegacyPass : public FunctionPass {
98118 NVPTXIncreaseLocalAlignmentLegacyPass () : FunctionPass(ID) {}
99119
100120 bool runOnFunction (Function &F) override ;
121+ void getAnalysisUsage (AnalysisUsage &AU) const override {
122+ AU.addRequired <TargetTransformInfoWrapperPass>();
123+ }
101124 StringRef getPassName () const override {
102125 return " NVPTX Increase Local Alignment" ;
103126 }
@@ -115,12 +138,15 @@ FunctionPass *llvm::createNVPTXIncreaseLocalAlignmentPass() {
115138}
116139
117140bool NVPTXIncreaseLocalAlignmentLegacyPass::runOnFunction (Function &F) {
118- return runSetLocalArrayAlignment (F);
141+ const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F);
142+ return NVPTXIncreaseLocalAlignment (TTI).run (F);
119143}
120144
121145PreservedAnalyses
122- NVPTXIncreaseLocalAlignmentPass::run (Function &F, FunctionAnalysisManager &AM) {
123- bool Changed = runSetLocalArrayAlignment (F);
146+ NVPTXIncreaseLocalAlignmentPass::run (Function &F,
147+ FunctionAnalysisManager &FAM) {
148+ const auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
149+ bool Changed = NVPTXIncreaseLocalAlignment (TTI).run (F);
124150
125151 if (!Changed)
126152 return PreservedAnalyses::all ();
0 commit comments