@@ -254,9 +254,8 @@ class AMDGPUCodeGenPrepareImpl
254254
255255 bool divHasSpecialOptimization (BinaryOperator &I,
256256 Value *Num, Value *Den) const ;
257- int getDivNumBits (BinaryOperator &I,
258- Value *Num, Value *Den,
259- unsigned AtLeast, bool Signed) const ;
257+ unsigned getDivNumBits (BinaryOperator &I, Value *Num, Value *Den,
258+ unsigned MaxDivBits, bool Signed) const ;
260259
261260 // / Expands 24 bit div or rem.
262261 Value* expandDivRem24 (IRBuilder<> &Builder, BinaryOperator &I,
@@ -1189,27 +1188,29 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
11891188 return getMul64 (Builder, LHS, RHS).second ;
11901189}
11911190
1192- // / Figure out how many bits are really needed for this division. \p AtLeast is
1193- // / an optimization hint to bypass the second ComputeNumSignBits call if we the
1194- // / first one is insufficient. Returns -1 on failure.
1195- int AMDGPUCodeGenPrepareImpl::getDivNumBits (BinaryOperator &I, Value *Num,
1196- Value *Den, unsigned AtLeast,
1197- bool IsSigned) const {
1191+ // / Figure out how many bits are really needed for this division.
1192+ // / \p MaxDivBits is an optimization hint to bypass the second
1193+ // / ComputeNumSignBits/computeKnownBits call if the first one is
1194+ // / insufficient.
1195+ unsigned AMDGPUCodeGenPrepareImpl::getDivNumBits (BinaryOperator &I, Value *Num,
1196+ Value *Den,
1197+ unsigned MaxDivBits,
1198+ bool IsSigned) const {
11981199 assert (Num->getType ()->getScalarSizeInBits () ==
11991200 Den->getType ()->getScalarSizeInBits ());
12001201 unsigned SSBits = Num->getType ()->getScalarSizeInBits ();
12011202 if (IsSigned) {
12021203 unsigned RHSSignBits = ComputeNumSignBits (Den, DL, 0 , AC, &I);
1203- if (RHSSignBits < AtLeast)
1204- return -1 ;
1204+ // A sign bit needs to be reserved for shrinking.
1205+ unsigned DivBits = SSBits - RHSSignBits + 1 ;
1206+ if (DivBits > MaxDivBits)
1207+ return SSBits;
12051208
12061209 unsigned LHSSignBits = ComputeNumSignBits (Num, DL, 0 , AC, &I);
1207- if (LHSSignBits < AtLeast)
1208- return -1 ;
12091210
12101211 unsigned SignBits = std::min (LHSSignBits, RHSSignBits);
1211- unsigned DivBits = SSBits - SignBits + 1 ;
1212- return DivBits; // a SignBit needs to be reserved for shrinking
1212+ DivBits = SSBits - SignBits + 1 ;
1213+ return DivBits;
12131214 }
12141215
12151216 // All bits are used for unsigned division for Num or Den in range
@@ -1218,14 +1219,17 @@ int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
12181219 if (Known.isNegative () || !Known.isNonNegative ())
12191220 return SSBits;
12201221 unsigned RHSSignBits = Known.countMinLeadingZeros ();
1222+ unsigned DivBits = SSBits - RHSSignBits;
1223+ if (DivBits > MaxDivBits)
1224+ return SSBits;
12211225
12221226 Known = computeKnownBits (Num, DL, 0 , AC, &I);
12231227 if (Known.isNegative () || !Known.isNonNegative ())
12241228 return SSBits;
12251229 unsigned LHSSignBits = Known.countMinLeadingZeros ();
12261230
12271231 unsigned SignBits = std::min (LHSSignBits, RHSSignBits);
1228- unsigned DivBits = SSBits - SignBits;
1232+ DivBits = SSBits - SignBits;
12291233 return DivBits;
12301234}
12311235
@@ -1235,11 +1239,8 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder,
12351239 BinaryOperator &I, Value *Num,
12361240 Value *Den, bool IsDiv,
12371241 bool IsSigned) const {
1238- unsigned SSBits = Num->getType ()->getScalarSizeInBits ();
1239- // If Num bits <= 24, assume 0 signbits.
1240- unsigned AtLeast = (SSBits <= 24 ) ? 0 : (SSBits - 24 + IsSigned);
1241- int DivBits = getDivNumBits (I, Num, Den, AtLeast, IsSigned);
1242- if (DivBits == -1 || DivBits > 24 )
1242+ unsigned DivBits = getDivNumBits (I, Num, Den, 24 , IsSigned);
1243+ if (DivBits > 24 )
12431244 return nullptr ;
12441245 return expandDivRem24Impl (Builder, I, Num, Den, DivBits, IsDiv, IsSigned);
12451246}
@@ -1523,9 +1524,8 @@ Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
15231524 bool IsDiv = Opc == Instruction::SDiv || Opc == Instruction::UDiv;
15241525 bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
15251526
1526- unsigned BitWidth = Num->getType ()->getScalarSizeInBits ();
1527- int NumDivBits = getDivNumBits (I, Num, Den, BitWidth - 32 , IsSigned);
1528- if (NumDivBits == -1 )
1527+ unsigned NumDivBits = getDivNumBits (I, Num, Den, 32 , IsSigned);
1528+ if (NumDivBits > 32 )
15291529 return nullptr ;
15301530
15311531 Value *Narrowed = nullptr ;
0 commit comments