-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Refine AMDGPUCodeGenPrepareImpl class. NFC. #118461
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Use references instead of pointers for most state, initialize it all in the constructor, and common up some of the initialization between the legacy and new pass manager paths.
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesUse references instead of pointers for most state, initialize it all in Patch is 20.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118461.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 7257b53afe69d0..5c92428c5f1859 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -95,32 +95,45 @@ static cl::opt<bool> DisableFDivExpand(
cl::ReallyHidden,
cl::init(false));
+static bool hasUnsafeFPMath(const Function &F) {
+ return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+}
+
class AMDGPUCodeGenPrepareImpl
: public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
public:
- const GCNSubtarget *ST = nullptr;
- const AMDGPUTargetMachine *TM = nullptr;
- const TargetLibraryInfo *TLInfo = nullptr;
- AssumptionCache *AC = nullptr;
- DominatorTree *DT = nullptr;
- UniformityInfo *UA = nullptr;
- Module *Mod = nullptr;
- const DataLayout *DL = nullptr;
- bool HasUnsafeFPMath = false;
- bool HasFP32DenormalFlush = false;
+ Function &F;
+ const GCNSubtarget &ST;
+ const AMDGPUTargetMachine &TM;
+ const TargetLibraryInfo *TLI;
+ AssumptionCache *AC;
+ const DominatorTree *DT;
+ const UniformityInfo &UA;
+ const DataLayout &DL;
+ bool HasUnsafeFPMath;
+ bool HasFP32DenormalFlush;
bool FlowChanged = false;
mutable Function *SqrtF32 = nullptr;
mutable Function *LdexpF32 = nullptr;
DenseMap<const PHINode *, bool> BreakPhiNodesCache;
+ AMDGPUCodeGenPrepareImpl(Function &F, const AMDGPUTargetMachine &TM,
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ const DominatorTree *DT, const UniformityInfo &UA)
+ : F(F), ST(TM.getSubtarget<GCNSubtarget>(F)), TM(TM), TLI(TLI), AC(AC),
+ DT(DT), UA(UA), DL(F.getDataLayout()),
+ HasUnsafeFPMath(hasUnsafeFPMath(F)),
+ HasFP32DenormalFlush(SIModeRegisterDefaults(F, ST).FP32Denormals ==
+ DenormalMode::getPreserveSign()) {}
+
Function *getSqrtF32() const {
if (SqrtF32)
return SqrtF32;
- LLVMContext &Ctx = Mod->getContext();
- SqrtF32 = Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::amdgcn_sqrt,
- {Type::getFloatTy(Ctx)});
+ LLVMContext &Ctx = F.getContext();
+ SqrtF32 = Intrinsic::getOrInsertDeclaration(
+ F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
return SqrtF32;
}
@@ -128,9 +141,10 @@ class AMDGPUCodeGenPrepareImpl
if (LdexpF32)
return LdexpF32;
- LLVMContext &Ctx = Mod->getContext();
+ LLVMContext &Ctx = F.getContext();
LdexpF32 = Intrinsic::getOrInsertDeclaration(
- Mod, Intrinsic::ldexp, {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
+ F.getParent(), Intrinsic::ldexp,
+ {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
return LdexpF32;
}
@@ -166,8 +180,7 @@ class AMDGPUCodeGenPrepareImpl
/// Wrapper to pass all the arguments to computeKnownFPClass
KnownFPClass computeKnownFPClass(const Value *V, FPClassTest Interested,
const Instruction *CtxI) const {
- return llvm::computeKnownFPClass(V, *DL, Interested, 0, TLInfo, AC, CtxI,
- DT);
+ return llvm::computeKnownFPClass(V, DL, Interested, 0, TLI, AC, CtxI, DT);
}
bool canIgnoreDenormalInput(const Value *V, const Instruction *CtxI) const {
@@ -317,13 +330,10 @@ class AMDGPUCodeGenPrepareImpl
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
bool visitMinNum(IntrinsicInst &I);
bool visitSqrt(IntrinsicInst &I);
- bool run(Function &F);
+ bool run();
};
class AMDGPUCodeGenPrepare : public FunctionPass {
-private:
- AMDGPUCodeGenPrepareImpl Impl;
-
public:
static char ID;
AMDGPUCodeGenPrepare() : FunctionPass(ID) {
@@ -339,13 +349,12 @@ class AMDGPUCodeGenPrepare : public FunctionPass {
AU.setPreservesAll();
}
bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
};
} // end anonymous namespace
-bool AMDGPUCodeGenPrepareImpl::run(Function &F) {
+bool AMDGPUCodeGenPrepareImpl::run() {
BreakPhiNodesCache.clear();
bool MadeChange = false;
@@ -411,7 +420,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
if (const VectorType *VT = dyn_cast<VectorType>(T)) {
// TODO: The set of packed operations is more limited, so may want to
// promote some anyway.
- if (ST->hasVOP3PInsts())
+ if (ST.hasVOP3PInsts())
return false;
return needsPromotionToI32(VT->getElementType());
@@ -422,7 +431,7 @@ bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const {
return Ty->isFloatTy() || Ty->isDoubleTy() ||
- (Ty->isHalfTy() && ST->has16BitInsts());
+ (Ty->isHalfTy() && ST.has16BitInsts());
}
// Return true if the op promoted to i32 should have nsw set.
@@ -455,11 +464,10 @@ static bool promotedOpIsNUW(const Instruction &I) {
bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
Type *Ty = I.getType();
- const DataLayout &DL = Mod->getDataLayout();
int TySize = DL.getTypeSizeInBits(Ty);
Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
- return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I);
+ return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.isUniform(&I);
}
bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -591,11 +599,11 @@ bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
}
unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const {
- return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits();
+ return computeKnownBits(Op, DL, 0, AC).countMaxActiveBits();
}
unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const {
- return ComputeMaxSignificantBits(Op, *DL, 0, AC);
+ return ComputeMaxSignificantBits(Op, DL, 0, AC);
}
static void extractValues(IRBuilder<> &Builder,
@@ -631,11 +639,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
Type *Ty = I.getType();
unsigned Size = Ty->getScalarSizeInBits();
- if (Size <= 16 && ST->has16BitInsts())
+ if (Size <= 16 && ST.has16BitInsts())
return false;
// Prefer scalar if this could be s_mul_i32
- if (UA->isUniform(&I))
+ if (UA.isUniform(&I))
return false;
Value *LHS = I.getOperand(0);
@@ -646,11 +654,11 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
unsigned LHSBits = 0, RHSBits = 0;
bool IsSigned = false;
- if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
+ if (ST.hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
(RHSBits = numBitsUnsigned(RHS)) <= 24) {
IsSigned = false;
- } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
+ } else if (ST.hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
(RHSBits = numBitsSigned(RHS)) <= 24) {
IsSigned = true;
@@ -730,21 +738,21 @@ bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const {
if (CastOp) {
if (!CastOp->hasOneUse())
return false;
- CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), *DL);
- CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), *DL);
+ CT = ConstantFoldCastOperand(CastOp->getOpcode(), CT, BO.getType(), DL);
+ CF = ConstantFoldCastOperand(CastOp->getOpcode(), CF, BO.getType(), DL);
}
// TODO: Handle special 0/-1 cases DAG combine does, although we only really
// need to handle divisions here.
- Constant *FoldedT = SelOpNo ?
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) :
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL);
+ Constant *FoldedT =
+ SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, DL)
+ : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, DL);
if (!FoldedT || isa<ConstantExpr>(FoldedT))
return false;
- Constant *FoldedF = SelOpNo ?
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) :
- ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL);
+ Constant *FoldedF =
+ SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, DL)
+ : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, DL);
if (!FoldedF || isa<ConstantExpr>(FoldedF))
return false;
@@ -777,7 +785,7 @@ AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
// result? It's unspecified by the spec.
Value *FrexpExp =
- ST->hasFractBug()
+ ST.hasFractBug()
? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
{Builder.getInt32Ty(), Ty}, Src)
: Builder.CreateExtractValue(Frexp, {1});
@@ -815,7 +823,7 @@ Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
// If we have have to work around the fract/frexp bug, we're worse off than
// using the fdiv.fast expansion. The full safe expansion is faster if we have
// fast FMA.
- if (HasFP32DenormalFlush && ST->hasFractBug() && !ST->hasFastFMAF32() &&
+ if (HasFP32DenormalFlush && ST.hasFractBug() && !ST.hasFastFMAF32() &&
(!FMF.noNaNs() || !FMF.noInfs()))
return nullptr;
@@ -1157,17 +1165,12 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
if (NewVal) {
FDiv.replaceAllUsesWith(NewVal);
NewVal->takeName(&FDiv);
- RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLI);
}
return true;
}
-static bool hasUnsafeFPMath(const Function &F) {
- Attribute Attr = F.getFnAttribute("unsafe-fp-math");
- return Attr.getValueAsBool();
-}
-
static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
Value *LHS, Value *RHS) {
Type *I32Ty = Builder.getInt32Ty();
@@ -1192,7 +1195,6 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
Value *Den, unsigned AtLeast,
bool IsSigned) const {
- const DataLayout &DL = Mod->getDataLayout();
unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
if (LHSSignBits < AtLeast)
return -1;
@@ -1271,7 +1273,7 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
Value *FQNeg = Builder.CreateFNeg(FQ);
// float fr = mad(fqneg, fb, fa);
- auto FMAD = !ST->hasMadMacF32Insts()
+ auto FMAD = !ST.hasMadMacF32Insts()
? Intrinsic::fma
: (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz;
Value *FR = Builder.CreateIntrinsic(FMAD,
@@ -1338,7 +1340,7 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
// If there's no wider mulhi, there's only a better expansion for powers of
// two.
// TODO: Should really know for each vector element.
- if (isKnownToBeAPowerOfTwo(C, *DL, true, 0, AC, &I, DT))
+ if (isKnownToBeAPowerOfTwo(C, DL, true, 0, AC, &I, DT))
return true;
return false;
@@ -1348,8 +1350,8 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (BinOpDen->getOpcode() == Instruction::Shl &&
isa<Constant>(BinOpDen->getOperand(0)) &&
- isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true,
- 0, AC, &I, DT)) {
+ isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), DL, true, 0, AC, &I,
+ DT)) {
return true;
}
}
@@ -1357,9 +1359,9 @@ bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
return false;
}
-static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
+static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL) {
// Check whether the sign can be determined statically.
- KnownBits Known = computeKnownBits(V, *DL);
+ KnownBits Known = computeKnownBits(V, DL);
if (Known.isNegative())
return Constant::getAllOnesValue(V->getType());
if (Known.isNonNegative())
@@ -1542,8 +1544,8 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
if (foldBinOpIntoSelect(I))
return true;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA->isUniform(&I) && promoteUniformOpToI32(I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+ UA.isUniform(&I) && promoteUniformOpToI32(I))
return true;
if (UseMul24Intrin && replaceMulWithMul24(I))
@@ -1655,11 +1657,11 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
};
WidenLoad->setMetadata(LLVMContext::MD_range,
- MDNode::get(Mod->getContext(), LowAndHigh));
+ MDNode::get(F.getContext(), LowAndHigh));
}
}
- int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
+ int TySize = DL.getTypeSizeInBits(I.getType());
Type *IntNTy = Builder.getIntNTy(TySize);
Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
@@ -1674,8 +1676,8 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
- UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
+ UA.isUniform(&I))
Changed |= promoteUniformOpToI32(I);
return Changed;
@@ -1688,8 +1690,8 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Value *CmpVal;
FCmpInst::Predicate Pred;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) {
- if (UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType())) {
+ if (UA.isUniform(&I))
return promoteUniformOpToI32(I);
return false;
}
@@ -1722,7 +1724,7 @@ bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
return true;
}
@@ -1947,7 +1949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
FixedVectorType *FVT = dyn_cast<FixedVectorType>(I.getType());
if (!FVT || FVT->getNumElements() == 1 ||
- DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
+ DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
return false;
if (!ForceBreakLargePHIs && !canBreakPHINode(I))
@@ -1960,7 +1962,7 @@ bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
unsigned Idx = 0;
// For 8/16 bits type, don't scalarize fully but break it up into as many
// 32-bit slices as we can, and scalarize the tail.
- const unsigned EltSize = DL->getTypeSizeInBits(EltTy);
+ const unsigned EltSize = DL.getTypeSizeInBits(EltTy);
const unsigned NumElts = FVT->getNumElements();
if (EltSize == 8 || EltSize == 16) {
const unsigned SubVecSize = (32 / EltSize);
@@ -2079,7 +2081,7 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
SmallVector<const Value *, 4> WorkList;
getUnderlyingObjects(I.getOperand(0), WorkList);
if (!all_of(WorkList, [&](const Value *V) {
- return isPtrKnownNeverNull(V, *DL, *TM, SrcAS);
+ return isPtrKnownNeverNull(V, DL, TM, SrcAS);
}))
return false;
@@ -2107,8 +2109,8 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
bool Changed = false;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- UA->isUniform(&I))
+ if (ST.has16BitInsts() && needsPromotionToI32(I.getType()) &&
+ UA.isUniform(&I))
Changed |= promoteUniformBitreverseToI32(I);
return Changed;
@@ -2120,7 +2122,7 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
/// If fract is a useful instruction for the subtarget. Does not account for the
/// nan handling; the instruction has a nan check on the input value.
Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
- if (ST->hasFractBug())
+ if (ST.hasFractBug())
return nullptr;
if (I.getIntrinsicID() != Intrinsic::minnum)
@@ -2177,7 +2179,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
// Match pattern for fract intrinsic in contexts where the nan check has been
// optimized out (and hope the knowledge the source can't be nan wasn't lost).
if (!I.hasNoNaNs() &&
- !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(*DL, TLInfo)))
+ !isKnownNeverNaN(FractArg, /*Depth=*/0, SimplifyQuery(DL, TLI)))
return false;
IRBuilder<> Builder(&I);
@@ -2189,7 +2191,7 @@ bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
Fract->takeName(&I);
I.replaceAllUsesWith(Fract);
- RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLI);
return true;
}
@@ -2201,7 +2203,7 @@ static bool isOneOrNegOne(const Value *Val) {
// Expand llvm.sqrt.f32 calls with !fpmath metadata in a semi-fast way.
bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
Type *Ty = Sqrt.getType()->getScalarType();
- if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST->has16BitInsts()))
+ if (!Ty->isFloatTy() && (!Ty->isHalfTy() || ST.has16BitInsts()))
return false;
const FPMathOperator *FPOp = cast<const FPMathOperator>(&Sqrt);
@@ -2257,14 +2259,6 @@ bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
return true;
}
-bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
- Impl.Mod = &M;
- Impl.DL = &Impl.Mod->getDataLayout();
- Impl.SqrtF32 = nullptr;
- Impl.LdexpF32 = nullptr;
- return false;
-}
-
bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -2274,36 +2268,26 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
return false;
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
- Impl.TM = &TM;
- Impl.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
- Impl.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- Impl.UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ AssumptionCache *AC =
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
- Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F, *Impl.ST);
- Impl.HasFP32DenormalFlush =
- Mode.FP32Denormals == DenormalMode::getPreserveSign();
- return Impl.run(F);
+ const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ const UniformityInfo &UA =
+ getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ return AMDGPUCodeGenPrepareImpl(F, TM, TLI, AC, DT, UA).run();
}
PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
FunctionAnalysisManager &FAM) {
- AMDGPUCodeGenPrepareImpl Impl;
- Impl.Mod = F.getParent();
- Impl.DL = &Impl.Mod->getDataLayout();
- Impl.TM = static_cast<const AMDGPUTargetMachine *>(&TM);
- Impl.TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
- Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
- Impl.AC = &FAM.getResult<AssumptionAnalysis>(F);
- Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
- Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
- Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
- SIModeRegisterDefaults Mode(F, *Impl.ST);
- Impl.HasFP32DenormalFlush =
- Mode.FP32Denormals == DenormalMode::getPreserveSign();
- if (!Impl.run(F))
+ const AMDGPUT...
[truncated]
|
arsenm
approved these changes
Dec 3, 2024
Co-authored-by: Matt Arsenault <[email protected]>
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Use references instead of pointers for most state, initialize it all in
the constructor, and common up some of the initialization between the
legacy and new pass manager paths.