[AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. #118461

jayfoad · 2024-12-03T10:22:20Z

Use references instead of pointers for most state, initialize it all in
the constructor, and common up some of the initialization between the
legacy and new pass manager paths.

Use references instead of pointers for most state, initialize it all in the constructor, and common up some of the initialization between the legacy and new pass manager paths.

llvmbot · 2024-12-03T10:22:54Z

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Use references instead of pointers for most state, initialize it all in
the constructor, and common up some of the initialization between the
legacy and new pass manager paths.

Patch is 20.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118461.diff

1 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+91-107)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 7257b53afe69d0..5c92428c5f1859 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -95,32 +95,45 @@ static cl::opt<bool> DisableFDivExpand(
   cl::ReallyHidden,
   cl::init(false));
 
+static bool hasUnsafeFPMath(const Function &F) {
+  return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+}
+
 class AMDGPUCodeGenPrepareImpl
     : public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
 public:
-  const GCNSubtarget *ST = nullptr;
-  const AMDGPUTargetMachine *TM = nullptr;
-  const TargetLibraryInfo *TLInfo = nullptr;
-  AssumptionCache *AC = nullptr;
-  DominatorTree *DT = nullptr;
-  UniformityInfo *UA = nullptr;
-  Module *Mod = nullptr;
-  const DataLayout *DL = nullptr;
-  bool HasUnsafeFPMath = false;
-  bool HasFP32DenormalFlush = false;
+  Function &F;
+  const GCNSubtarget &ST;
+  const AMDGPUTargetMachine &TM;
+  const TargetLibraryInfo *TLI;
+  AssumptionCache *AC;
+  const DominatorTree *DT;
+  const UniformityInfo &UA;
+  const DataLayout &DL;
+  bool HasUnsafeFPMath;
+  bool HasFP32DenormalFlush;
   bool FlowChanged = false;
   mutable Function *SqrtF32 = nullptr;
   mutable Function *LdexpF32 = nullptr;
 
   DenseMap<const PHINode *, bool> BreakPhiNodesCache;
 
+  AMDGPUCodeGenPrepareImpl(Function &F, const AMDGPUTargetMachine &TM,
+                           const TargetLibraryInfo *TLI, AssumptionCache *AC,
+                           const DominatorTree *DT, const UniformityInfo &UA)
+      : F(F), ST(TM.getSubtarget<GCNSubtarget>(F)), TM(TM), TLI(TLI), AC(AC),
+        DT(DT), UA(UA), DL(F.getDataLayout()),
+        HasUnsafeFPMath(hasUnsafeFPMath(F)),
+        HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals ==
+                             DenormalMode::getPreserveSign()) {}
+
   Function *getSqrtF32() const {
     if (SqrtF32)
       return SqrtF32;
 
-    LLVMContext &Ctx = Mod->getContext();
-    SqrtF32 = Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::amdgcn_sqrt,
-                                                {Type::getFloatTy(Ctx)});
+    LLVMContext &Ctx = F.getContext();
+    SqrtF32 = Intrinsic::getOrInsertDeclaration(
+        F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
     return SqrtF32;
   }
 
@@ -128,9 +141,10 @@ class AMDGPUCodeGenPrepareImpl
     if (LdexpF32)
       return LdexpF32;
 
-    LLVMContext &Ctx = Mod->getContext();
+    LLVMContext &Ctx = F.getContext();
     LdexpF32 = Intrinsic::getOrInsertDeclaration(
-        Mod, Intrinsic::ldexp, {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
+        F.getParent(), Intrinsic::ldexp,
+        {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
     return LdexpF32;
   }
 
@@ -166,8 +180,7 @@ class AMDGPUCodeGenPrepareImpl
   /// Wrapper to pass all the arguments to computeKnownFPClass
   KnownFPClass computeKnownFPClass(const Value *V, FPClassTest Interested,
                                    const Instruction *CtxI) const {
-    return llvm::computeKnownFPClass(V, *DL, Interested, 0, TLInfo, AC, CtxI,
-                                     DT);
+    return llvm::computeKnownFPClass(V, DL, Interested, 0, TLI, AC, CtxI, DT);
   }
 
   bool canIgnoreDenormalInput(const Value *V, const Instruction *CtxI) const {
@@ -317,13 +330,10 @@ class AMDGPUCodeGenPrepareImpl
   bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
   bool visitMinNum(IntrinsicInst &I);
   bool visitSqrt(IntrinsicInst &I);
-  bool run(Function &F);
+  bool run();
 };
 
 class AMDGPUCodeGenPrepare : public FunctionPass {
-private:
-  AMDGPUCodeGenPrepareImpl Impl;
-
 public:
   static char ID;
   AMDGPUCodeGenPrepare() : FunctionPass(ID) {
@@ -339,13 +349,12 @@ class AMDGPUCodeGenPrepare : public FunctionPass {
       AU.setPreservesAll();
   }
   bool runOnFunction(Function &F) override;
-  bool doInitialization(Module &M) override;
   StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
 };
 
 } // end anonymous namespace
 
-bool AMDGPUCodeGenPrepareImpl::run(Function &F) {
+bool AMDGPUCodeGenPrepareImpl::run() {
   BreakPhiNodesCache.clear();
   bool MadeChange = false;
 
@@ -411,7 +420,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
   if (const VectorType *VT = dyn_cast<VectorType>(T)) {
     // TODO: The set of packed operations is more limited, so may want to
     // promote some anyway.
-    if (ST->hasVOP3PInsts())
+    if (ST.hasVOP3PInsts())
       return false;
 
     return needsPromotionToI32(VT->getElementType());
@@ -422,7 +431,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
 
 bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const {
   return Ty->isFloatTy() || Ty->isDoubleTy() ||
-         (Ty->isHalfTy() && ST->has16BitInsts());
+         (Ty->isHalfTy() && ST.has16BitInsts());
 }
 
 // Return true if the op promoted to i32 should have nsw set.
@@ -455,11 +464,10 @@ static bool promotedOpIsNUW(const Instruction &I) {
 
 bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
   Type *Ty = I.getType();
-  const DataLayout &DL = Mod->getDataLayout();
   int TySize = DL.getTypeSizeInBits(Ty);
   Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
 
-  return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I);
+  return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I);
 }
 
 bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -591,11 +599,11 @@ bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
 }
 
 unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const {
-  return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits();
+  return computeKnownBits(Op, DL, 0, AC).countMaxActiveBits();
 }
 
 unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const {
-  return ComputeMaxSignificantBits(Op, *DL, 0, AC);
+  return ComputeMaxSignificantBits(Op, DL, 0, AC);
 }
 
 static void extractValues(IRBuilder<> &Builder,
@@ -631,11 +639,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
 
   Type *Ty = I.getType();
   unsigned Size = Ty->getScalarSizeInBits();
-  if (Size <= 16 && ST->has16BitInsts())
+  if (Size <= 16 && ST.has16BitInsts())
     return false;
 
   // Prefer scalar if this could be s_mul_i32
-  if (UA->isUniform(&I))
+  if (UA.isUniform(&I))
     return false;
 
   Value *LHS = I.getOperand(0);
@@ -646,11 +654,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
   unsigned LHSBits = 0, RHSBits = 0;
   bool IsSigned = false;
 
-  if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
+  if (ST.hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
       (RHSBits = numBitsUnsigned(RHS)) <= 24) {
     IsSigned = false;
 
-  } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
+  } else if (ST.hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
              (RHSBits = numBitsSigned(RHS)) <= 24) {
     IsSigned = true;
 
@@ -730,21 +738,21 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const {
   if (CastOp) {
     if (!CastOp->hasOneUse())
       return false;
-    CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
-    CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
+    CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), DL);
+    CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), DL);
   }
 
   // TODO: Handle special 0/-1 cases DAG combine does, although we only really
   // need to handle divisions here.
-  Constant *FoldedT = SelOpNo ?
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) :
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL);
+  Constant *FoldedT =
+      SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, DL)
+              : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, DL);
   if (!FoldedT || isa<ConstantExpr>(FoldedT))
     return false;
 
-  Constant *FoldedF = SelOpNo ?
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) :
-    ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL);
+  Constant *FoldedF =
+      SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, DL)
+              : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, DL);
   if (!FoldedF || isa<ConstantExpr>(FoldedF))
     return false;
 
@@ -777,7 +785,7 @@ AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
   // result? It's unspecified by the spec.
 
   Value *FrexpExp =
-      ST->hasFractBug()
+      ST.hasFractBug()
           ? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
                                     {Builder.getInt32Ty(), Ty}, Src)
           : Builder.CreateExtractValue(Frexp, {1});
@@ -815,7 +823,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
   // If we have have to work around the fract/frexp bug, we're worse off than
   // using the fdiv.fast expansion. The full safe expansion is faster if we have
   // fast FMA.
-  if (HasFP32DenormalFlush && ST->hasFractBug() && !ST->hasFastFMAF32() &&
+  if (HasFP32DenormalFlush && ST.hasFractBug() && !ST.hasFastFMAF32() &&
       (!FMF.noNaNs() || !FMF.noInfs()))
     return nullptr;
 
@@ -1157,17 +1165,12 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
   if (NewVal) {
     FDiv.replaceAllUsesWith(NewVal);
     NewVal->takeName(&FDiv);
-    RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLInfo);
+    RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI);
   }
 
   return true;
 }
 
-static bool hasUnsafeFPMath(const Function &F) {
-  Attribute Attr = F.getFnAttribute("unsafe-fp-math");
-  return Attr.getValueAsBool();
-}
-
 static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
                                           Value *LHS, Value *RHS) {
   Type *I32Ty = Builder.getInt32Ty();
@@ -1192,7 +1195,6 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
 int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
                                             Value *Den, unsigned AtLeast,
                                             bool IsSigned) const {
-  const DataLayout &DL = Mod->getDataLayout();
   unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
   if (LHSSignBits < AtLeast)
     return -1;
@@ -1271,7 +1273,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
   Value *FQNeg = Builder.CreateFNeg(FQ);
 
   // float fr = mad(fqneg, fb, fa);
-  auto FMAD = !ST->hasMadMacF32Insts()
+  auto FMAD = !ST.hasMadMacF32Insts()
                   ? Intrinsic::fma
                   : (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
   Value *FR = Builder.CreateIntrinsic(FMAD,
@@ -1338,7 +1340,7 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
     // If there's no wider mulhi, there's only a better expansion for powers of
     // two.
     // TODO: Should really know for each vector element.
-    if (isKnownToBeAPowerOfTwo(C, *DL, true, 0, AC, &I, DT))
+    if (isKnownToBeAPowerOfTwo(C, DL, true, 0, AC, &I, DT))
       return true;
 
     return false;
@@ -1348,8 +1350,8 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
     // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
     if (BinOpDen->getOpcode() == Instruction::Shl &&
         isa<Constant>(BinOpDen->getOperand(0)) &&
-        isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true,
-                               0, AC, &I, DT)) {
+        isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), DL, true, 0, AC, &I,
+                               DT)) {
       return true;
     }
   }
@@ -1357,9 +1359,9 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
   return false;
 }
 
-static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
+static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL) {
   // Check whether the sign can be determined statically.
-  KnownBits Known = computeKnownBits(V, *DL);
+  KnownBits Known = computeKnownBits(V, DL);
   if (Known.isNegative())
     return Constant::getAllOnesValue(V->getType());
   if (Known.isNonNegative())
@@ -1542,8 +1544,8 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
   if (foldBinOpIntoSelect(I))
     return true;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
-      UA->isUniform(&I) && promoteUniformOpToI32(I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+      UA.isUniform(&I) && promoteUniformOpToI32(I))
     return true;
 
   if (UseMul24Intrin && replaceMulWithMul24(I))
@@ -1655,11 +1657,11 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
         };
 
         WidenLoad->setMetadata(LLVMContext::MD_range,
-                               MDNode::get(Mod->getContext(), LowAndHigh));
+                               MDNode::get(F.getContext(), LowAndHigh));
       }
     }
 
-    int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
+    int TySize = DL.getTypeSizeInBits(I.getType());
     Type *IntNTy = Builder.getIntNTy(TySize);
     Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
     Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
@@ -1674,8 +1676,8 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
 bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) {
   bool Changed = false;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
-      UA->isUniform(&I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
+      UA.isUniform(&I))
     Changed |= promoteUniformOpToI32(I);
 
   return Changed;
@@ -1688,8 +1690,8 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
   Value *CmpVal;
   FCmpInst::Predicate Pred;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) {
-    if (UA->isUniform(&I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) {
+    if (UA.isUniform(&I))
       return promoteUniformOpToI32(I);
     return false;
   }
@@ -1722,7 +1724,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
 
   Fract->takeName(&I);
   I.replaceAllUsesWith(Fract);
-  RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+  RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
   return true;
 }
 
@@ -1947,7 +1949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
 
   FixedVectorType *FVT = dyn_cast<FixedVectorType>(I.getType());
   if (!FVT || FVT->getNumElements() == 1 ||
-      DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
+      DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
     return false;
 
   if (!ForceBreakLargePHIs && !canBreakPHINode(I))
@@ -1960,7 +1962,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
     unsigned Idx = 0;
     // For 8/16 bits type, don't scalarize fully but break it up into as many
     // 32-bit slices as we can, and scalarize the tail.
-    const unsigned EltSize = DL->getTypeSizeInBits(EltTy);
+    const unsigned EltSize = DL.getTypeSizeInBits(EltTy);
     const unsigned NumElts = FVT->getNumElements();
     if (EltSize == 8 || EltSize == 16) {
       const unsigned SubVecSize = (32 / EltSize);
@@ -2079,7 +2081,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
   SmallVector<const Value *, 4> WorkList;
   getUnderlyingObjects(I.getOperand(0), WorkList);
   if (!all_of(WorkList, [&](const Value *V) {
-        return isPtrKnownNeverNull(V, *DL, *TM, SrcAS);
+        return isPtrKnownNeverNull(V, DL, TM, SrcAS);
       }))
     return false;
 
@@ -2107,8 +2109,8 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
 bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
   bool Changed = false;
 
-  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
-      UA->isUniform(&I))
+  if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+      UA.isUniform(&I))
     Changed |= promoteUniformBitreverseToI32(I);
 
   return Changed;
@@ -2120,7 +2122,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
 /// If fract is a useful instruction for the subtarget. Does not account for the
 /// nan handling; the instruction has a nan check on the input value.
 Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
-  if (ST->hasFractBug())
+  if (ST.hasFractBug())
     return nullptr;
 
   if (I.getIntrinsicID() != Intrinsic::minnum)
@@ -2177,7 +2179,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
   // Match pattern for fract intrinsic in contexts where the nan check has been
   // optimized out (and hope the knowledge the source can't be nan wasn't lost).
   if (!I.hasNoNaNs() &&
-      !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(*DL, TLInfo)))
+      !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(DL, TLI)))
     return false;
 
   IRBuilder<> Builder(&I);
@@ -2189,7 +2191,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
   Fract->takeName(&I);
   I.replaceAllUsesWith(Fract);
 
-  RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+  RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
   return true;
 }
 
@@ -2201,7 +2203,7 @@ static bool isOneOrNegOne(const Value *Val) {
 // Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way.
 bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
   Type *Ty = Sqrt.getType()->getScalarType();
-  if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST->has16BitInsts()))
+  if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST.has16BitInsts()))
     return false;
 
   const FPMathOperator *FPOp = cast<const FPMathOperator>(&Sqrt);
@@ -2257,14 +2259,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
   return true;
 }
 
-bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
-  Impl.Mod = &M;
-  Impl.DL = &Impl.Mod->getDataLayout();
-  Impl.SqrtF32 = nullptr;
-  Impl.LdexpF32 = nullptr;
-  return false;
-}
-
 bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
@@ -2274,36 +2268,26 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
     return false;
 
   const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
-  Impl.TM = &TM;
-  Impl.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-  Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
-  Impl.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  Impl.UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+  const TargetLibraryInfo *TLI =
+      &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  AssumptionCache *AC =
+      &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
-  SIModeRegisterDefaults Mode(F, *Impl.ST);
-  Impl.HasFP32DenormalFlush =
-      Mode.FP32Denormals == DenormalMode::getPreserveSign();
-  return Impl.run(F);
+  const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  const UniformityInfo &UA =
+      getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+  return AMDGPUCodeGenPrepareImpl(F, TM, TLI, AC, DT, UA).run();
 }
 
 PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
                                                 FunctionAnalysisManager &FAM) {
-  AMDGPUCodeGenPrepareImpl Impl;
-  Impl.Mod = F.getParent();
-  Impl.DL = &Impl.Mod->getDataLayout();
-  Impl.TM = static_cast<const AMDGPUTargetMachine *>(&TM);
-  Impl.TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
-  Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
-  Impl.AC = &FAM.getResult<AssumptionAnalysis>(F);
-  Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
-  Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
-  Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
-  SIModeRegisterDefaults Mode(F, *Impl.ST);
-  Impl.HasFP32DenormalFlush =
-      Mode.FP32Denormals == DenormalMode::getPreserveSign();
-  if (!Impl.run(F))
+  const AMDGPUT...
[truncated]

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Co-authored-by: Matt Arsenault <[email protected]>

[AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC.

318ebd5

Use references instead of pointers for most state, initialize it all in the constructor, and common up some of the initialization between the legacy and new pass manager paths.

llvmbot added the backend:AMDGPU label Dec 3, 2024

arsenm approved these changes Dec 3, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp Outdated Show resolved Hide resolved

Update llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

df3fe0d

Co-authored-by: Matt Arsenault <[email protected]>

jayfoad merged commit 9ad09b2 into llvm:main Dec 3, 2024
8 checks passed

jayfoad deleted the refine-amdgpucodegenprepareimpl branch December 3, 2024 15:31

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. #118461

[AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. #118461

Uh oh!

jayfoad commented Dec 3, 2024

Uh oh!

llvmbot commented Dec 3, 2024

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. #118461

[AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. #118461

Uh oh!

Conversation

jayfoad commented Dec 3, 2024

Uh oh!

llvmbot commented Dec 3, 2024

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants