@@ -42,25 +42,25 @@ namespace {
4242
4343class AMDGPULateCodeGenPrepare
4444 : public InstVisitor<AMDGPULateCodeGenPrepare, bool > {
45- Module *Mod = nullptr ;
46- const DataLayout *DL = nullptr ;
45+ Function &F ;
46+ const DataLayout &DL ;
4747 const GCNSubtarget &ST;
4848
49- AssumptionCache *AC = nullptr ;
50- UniformityInfo *UA = nullptr ;
49+ AssumptionCache *const AC ;
50+ UniformityInfo &UA ;
5151
5252 SmallVector<WeakTrackingVH, 8 > DeadInsts;
5353
5454public:
55- AMDGPULateCodeGenPrepare (Module &M , const GCNSubtarget &ST,
56- AssumptionCache *AC, UniformityInfo * UA)
57- : Mod(&M ), DL(&M .getDataLayout()), ST(ST), AC(AC), UA(UA) {}
58- bool run (Function &F );
55+ AMDGPULateCodeGenPrepare (Function &F , const GCNSubtarget &ST,
56+ AssumptionCache *AC, UniformityInfo & UA)
57+ : F(F ), DL(F .getDataLayout()), ST(ST), AC(AC), UA(UA) {}
58+ bool run ();
5959 bool visitInstruction (Instruction &) { return false ; }
6060
6161 // Check if the specified value is at least DWORD aligned.
6262 bool isDWORDAligned (const Value *V) const {
63- KnownBits Known = computeKnownBits (V, * DL, 0 , AC);
63+ KnownBits Known = computeKnownBits (V, DL, 0 , AC);
6464 return Known.countMinTrailingZeros () >= 2 ;
6565 }
6666
@@ -72,11 +72,11 @@ using ValueToValueMap = DenseMap<const Value *, Value *>;
7272
7373class LiveRegOptimizer {
7474private:
75- Module * Mod = nullptr ;
76- const DataLayout *DL = nullptr ;
77- const GCNSubtarget * ST;
75+ Module & Mod;
76+ const DataLayout &DL ;
77+ const GCNSubtarget & ST;
7878 // / The scalar type to convert to
79- Type *ConvertToScalar;
79+ Type *const ConvertToScalar;
8080 // / The set of visited Instructions
8181 SmallPtrSet<Instruction *, 4 > Visited;
8282 // / Map of Value -> Converted Value
@@ -110,7 +110,7 @@ class LiveRegOptimizer {
110110 if (!VTy)
111111 return false ;
112112
113- const auto *TLI = ST-> getTargetLowering ();
113+ const auto *TLI = ST. getTargetLowering ();
114114
115115 Type *EltTy = VTy->getElementType ();
116116 // If the element size is not less than the convert to scalar size, then we
@@ -125,23 +125,22 @@ class LiveRegOptimizer {
125125 return LK.first != TargetLoweringBase::TypeLegal;
126126 }
127127
128- LiveRegOptimizer (Module *Mod, const GCNSubtarget *ST) : Mod(Mod), ST(ST) {
129- DL = &Mod->getDataLayout ();
130- ConvertToScalar = Type::getInt32Ty (Mod->getContext ());
131- }
128+ LiveRegOptimizer (Module &Mod, const GCNSubtarget &ST)
129+ : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
130+ ConvertToScalar (Type::getInt32Ty(Mod.getContext())) {}
132131};
133132
134133} // end anonymous namespace
135134
136- bool AMDGPULateCodeGenPrepare::run (Function &F ) {
135+ bool AMDGPULateCodeGenPrepare::run () {
137136 // "Optimize" the virtual regs that cross basic block boundaries. When
138137 // building the SelectionDAG, vectors of illegal types that cross basic blocks
139138 // will be scalarized and widened, with each scalar living in its
140139 // own register. To work around this, this optimization converts the
141140 // vectors to equivalent vectors of legal type (which are converted back
142141 // before uses in subsequent blocks), to pack the bits into fewer physical
143142 // registers (used in CopyToReg/CopyFromReg pairs).
144- LiveRegOptimizer LRO (Mod, & ST);
143+ LiveRegOptimizer LRO (*F. getParent (), ST);
145144
146145 bool Changed = false ;
147146
@@ -163,15 +162,15 @@ Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
163162
164163 FixedVectorType *VTy = cast<FixedVectorType>(OriginalType);
165164
166- TypeSize OriginalSize = DL-> getTypeSizeInBits (VTy);
167- TypeSize ConvertScalarSize = DL-> getTypeSizeInBits (ConvertToScalar);
165+ TypeSize OriginalSize = DL. getTypeSizeInBits (VTy);
166+ TypeSize ConvertScalarSize = DL. getTypeSizeInBits (ConvertToScalar);
168167 unsigned ConvertEltCount =
169168 (OriginalSize + ConvertScalarSize - 1 ) / ConvertScalarSize;
170169
171170 if (OriginalSize <= ConvertScalarSize)
172- return IntegerType::get (Mod-> getContext (), ConvertScalarSize);
171+ return IntegerType::get (Mod. getContext (), ConvertScalarSize);
173172
174- return VectorType::get (Type::getIntNTy (Mod-> getContext (), ConvertScalarSize),
173+ return VectorType::get (Type::getIntNTy (Mod. getContext (), ConvertScalarSize),
175174 ConvertEltCount, false );
176175}
177176
@@ -180,8 +179,8 @@ Value *LiveRegOptimizer::convertToOptType(Instruction *V,
180179 FixedVectorType *VTy = cast<FixedVectorType>(V->getType ());
181180 Type *NewTy = calculateConvertType (V->getType ());
182181
183- TypeSize OriginalSize = DL-> getTypeSizeInBits (VTy);
184- TypeSize NewSize = DL-> getTypeSizeInBits (NewTy);
182+ TypeSize OriginalSize = DL. getTypeSizeInBits (VTy);
183+ TypeSize NewSize = DL. getTypeSizeInBits (NewTy);
185184
186185 IRBuilder<> Builder (V->getParent (), InsertPt);
187186 // If there is a bitsize match, we can fit the old vector into a new vector of
@@ -210,8 +209,8 @@ Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
210209 BasicBlock *InsertBB) {
211210 FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType);
212211
213- TypeSize OriginalSize = DL-> getTypeSizeInBits (V->getType ());
214- TypeSize NewSize = DL-> getTypeSizeInBits (NewVTy);
212+ TypeSize OriginalSize = DL. getTypeSizeInBits (V->getType ());
213+ TypeSize NewSize = DL. getTypeSizeInBits (NewVTy);
215214
216215 IRBuilder<> Builder (InsertBB, InsertPt);
217216 // If there is a bitsize match, we simply convert back to the original type.
@@ -224,14 +223,14 @@ Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
224223 // For wide scalars, we can just truncate the value.
225224 if (!V->getType ()->isVectorTy ()) {
226225 Instruction *Trunc = cast<Instruction>(
227- Builder.CreateTrunc (V, IntegerType::get (Mod-> getContext (), NewSize)));
226+ Builder.CreateTrunc (V, IntegerType::get (Mod. getContext (), NewSize)));
228227 return cast<Instruction>(Builder.CreateBitCast (Trunc, NewVTy));
229228 }
230229
231230 // For wider vectors, we must strip the MSBs to convert back to the original
232231 // type.
233232 VectorType *ExpandedVT = VectorType::get (
234- Type::getIntNTy (Mod-> getContext (), NewVTy->getScalarSizeInBits ()),
233+ Type::getIntNTy (Mod. getContext (), NewVTy->getScalarSizeInBits ()),
235234 (OriginalSize / NewVTy->getScalarSizeInBits ()), false );
236235 Instruction *Converted =
237236 cast<Instruction>(Builder.CreateBitCast (V, ExpandedVT));
@@ -410,15 +409,15 @@ bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
410409 // Skip aggregate types.
411410 if (Ty->isAggregateType ())
412411 return false ;
413- unsigned TySize = DL-> getTypeStoreSize (Ty);
412+ unsigned TySize = DL. getTypeStoreSize (Ty);
414413 // Only handle sub-DWORD loads.
415414 if (TySize >= 4 )
416415 return false ;
417416 // That load must be at least naturally aligned.
418- if (LI.getAlign () < DL-> getABITypeAlign (Ty))
417+ if (LI.getAlign () < DL. getABITypeAlign (Ty))
419418 return false ;
420419 // It should be uniform, i.e. a scalar load.
421- return UA-> isUniform (&LI);
420+ return UA. isUniform (&LI);
422421}
423422
424423bool AMDGPULateCodeGenPrepare::visitLoadInst (LoadInst &LI) {
@@ -435,7 +434,7 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
435434
436435 int64_t Offset = 0 ;
437436 auto *Base =
438- GetPointerBaseWithConstantOffset (LI.getPointerOperand (), Offset, * DL);
437+ GetPointerBaseWithConstantOffset (LI.getPointerOperand (), Offset, DL);
439438 // If that base is not DWORD aligned, it's not safe to perform the following
440439 // transforms.
441440 if (!isDWORDAligned (Base))
@@ -452,7 +451,7 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
452451 IRBuilder<> IRB (&LI);
453452 IRB.SetCurrentDebugLocation (LI.getDebugLoc ());
454453
455- unsigned LdBits = DL-> getTypeStoreSizeInBits (LI.getType ());
454+ unsigned LdBits = DL. getTypeStoreSizeInBits (LI.getType ());
456455 auto *IntNTy = Type::getIntNTy (LI.getContext (), LdBits);
457456
458457 auto *NewPtr = IRB.CreateConstGEP1_64 (
@@ -480,9 +479,7 @@ AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
480479 AssumptionCache &AC = FAM.getResult <AssumptionAnalysis>(F);
481480 UniformityInfo &UI = FAM.getResult <UniformityInfoAnalysis>(F);
482481
483- AMDGPULateCodeGenPrepare Impl (*F.getParent (), ST, &AC, &UI);
484-
485- bool Changed = Impl.run (F);
482+ bool Changed = AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
486483
487484 if (!Changed)
488485 return PreservedAnalyses::all ();
@@ -524,9 +521,7 @@ bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
524521 UniformityInfo &UI =
525522 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo ();
526523
527- AMDGPULateCodeGenPrepare Impl (*F.getParent (), ST, &AC, &UI);
528-
529- return Impl.run (F);
524+ return AMDGPULateCodeGenPrepare (F, ST, &AC, UI).run ();
530525}
531526
532527INITIALIZE_PASS_BEGIN (AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
0 commit comments