66//
77// ===----------------------------------------------------------------------===//
88//
9- // A simple pass that looks at local memory arrays that are statically
9+ // A simple pass that looks at local memory allocas that are statically
1010// sized and potentially increases their alignment. This enables vectorization
11- // of loads/stores to these arrays if not explicitly specified by the client.
11+ // of loads/stores to these allocas if not explicitly specified by the client.
1212//
13- // TODO: Ideally we should do a bin-packing of local arrays to maximize
13+ // TODO: Ideally we should do a bin-packing of local allocas to maximize
1414// alignments while minimizing holes.
1515//
1616// ===----------------------------------------------------------------------===//
2828
2929using namespace llvm ;
3030
31- static cl::opt<bool >
32- MaxLocalArrayAlignment ( " nvptx-use-max -local-array- alignment" ,
33- cl::init ( false ), cl::Hidden,
34- cl::desc( " Use maximum alignment for local memory " ));
31+ static cl::opt<unsigned > MinLocalArrayAlignment (
32+ " nvptx-ensure-minimum -local-alignment" , cl::init( 16 ), cl::Hidden ,
33+ cl::desc(
34+ " Ensure local memory objects are at least this aligned (default 16) " ));
3535
3636static Align getMaxLocalArrayAlignment (const TargetTransformInfo &TTI) {
3737 const unsigned MaxBitWidth =
@@ -41,45 +41,46 @@ static Align getMaxLocalArrayAlignment(const TargetTransformInfo &TTI) {
4141
4242namespace {
4343struct NVPTXIncreaseLocalAlignment {
44- const Align MaxAlign ;
44+ const Align MaxUsableAlign ;
4545
4646 NVPTXIncreaseLocalAlignment (const TargetTransformInfo &TTI)
47- : MaxAlign (getMaxLocalArrayAlignment(TTI)) {}
47+ : MaxUsableAlign (getMaxLocalArrayAlignment(TTI)) {}
4848
4949 bool run (Function &F);
5050 bool updateAllocaAlignment (AllocaInst *Alloca, const DataLayout &DL);
51- Align getAggressiveArrayAlignment (unsigned ArraySize);
52- Align getConservativeArrayAlignment (unsigned ArraySize);
51+ Align getMaxUsefulArrayAlignment (unsigned ArraySize);
52+ Align getMaxSafeLocalAlignment (unsigned ArraySize);
5353};
5454} // namespace
5555
56- // / Get the maximum useful alignment for an array . This is more likely to
56+ // / Get the maximum useful alignment for an allocation . This is more likely to
5757// / produce holes in the local memory.
5858// /
59- // / Choose an alignment large enough that the entire array could be loaded with
60- // / a single vector load (if possible). Cap the alignment at
61- // / MaxPTXArrayAlignment .
62- Align NVPTXIncreaseLocalAlignment::getAggressiveArrayAlignment (
59+ // / Choose an alignment large enough that the entire alloca could be loaded
60+ // / with a single vector load (if possible). Cap the alignment at
61+ // / MinLocalArrayAlignment and MaxUsableAlign .
62+ Align NVPTXIncreaseLocalAlignment::getMaxUsefulArrayAlignment (
6363 const unsigned ArraySize) {
64- return std::min (MaxAlign, Align (PowerOf2Ceil (ArraySize)));
64+ const Align UpperLimit =
65+ std::min (MaxUsableAlign, Align (MinLocalArrayAlignment));
66+ return std::min (UpperLimit, Align (PowerOf2Ceil (ArraySize)));
6567}
6668
67- // / Get the alignment of arrays that reduces the chances of leaving holes when
68- // / arrays are allocated within a contiguous memory buffer (like shared memory
69- // / and stack). Holes are still possible before and after the array allocation.
69+ // / Get the alignment of allocas that reduces the chances of leaving holes when
70+ // / they are allocated within a contiguous memory buffer (like the stack).
71+ // / Holes are still possible before and after the allocation.
7072// /
71- // / Choose the largest alignment such that the array size is a multiple of the
72- // / alignment. If all elements of the buffer are allocated in order of
73+ // / Choose the largest alignment such that the allocation size is a multiple of
74+ // / the alignment. If all elements of the buffer are allocated in order of
7375// / alignment (higher to lower) no holes will be left.
74- Align NVPTXIncreaseLocalAlignment::getConservativeArrayAlignment (
76+ Align NVPTXIncreaseLocalAlignment::getMaxSafeLocalAlignment (
7577 const unsigned ArraySize) {
76- return commonAlignment (MaxAlign , ArraySize);
78+ return commonAlignment (MaxUsableAlign , ArraySize);
7779}
7880
79- // / Find a better alignment for local arrays
81+ // / Find a better alignment for local allocas.
8082bool NVPTXIncreaseLocalAlignment::updateAllocaAlignment (AllocaInst *Alloca,
8183 const DataLayout &DL) {
82- // Looking for statically sized local arrays
8384 if (!Alloca->isStaticAlloca ())
8485 return false ;
8586
@@ -88,12 +89,15 @@ bool NVPTXIncreaseLocalAlignment::updateAllocaAlignment(AllocaInst *Alloca,
8889 return false ;
8990
9091 const auto ArraySizeValue = ArraySize->getFixedValue ();
91- const Align PreferredAlignment =
92- MaxLocalArrayAlignment ? getAggressiveArrayAlignment (ArraySizeValue)
93- : getConservativeArrayAlignment (ArraySizeValue);
92+ if (ArraySizeValue == 0 )
93+ return false ;
94+
95+ const Align NewAlignment =
96+ std::max (getMaxSafeLocalAlignment (ArraySizeValue),
97+ getMaxUsefulArrayAlignment (ArraySizeValue));
9498
95- if (PreferredAlignment > Alloca->getAlign ()) {
96- Alloca->setAlignment (PreferredAlignment );
99+ if (NewAlignment > Alloca->getAlign ()) {
100+ Alloca->setAlignment (NewAlignment );
97101 return true ;
98102 }
99103
@@ -130,8 +134,7 @@ struct NVPTXIncreaseLocalAlignmentLegacyPass : public FunctionPass {
130134char NVPTXIncreaseLocalAlignmentLegacyPass::ID = 0 ;
131135INITIALIZE_PASS (NVPTXIncreaseLocalAlignmentLegacyPass,
132136 " nvptx-increase-local-alignment" ,
133- " Increase alignment for statically sized alloca arrays" , false ,
134- false )
137+ " Increase alignment for statically sized allocas" , false , false )
135138
136139FunctionPass *llvm::createNVPTXIncreaseLocalAlignmentPass() {
137140 return new NVPTXIncreaseLocalAlignmentLegacyPass ();
0 commit comments