@@ -254,9 +254,8 @@ class AMDGPUCodeGenPrepareImpl
254
254
255
255
bool divHasSpecialOptimization (BinaryOperator &I,
256
256
Value *Num, Value *Den) const ;
257
- int getDivNumBits (BinaryOperator &I,
258
- Value *Num, Value *Den,
259
- unsigned AtLeast, bool Signed) const ;
257
+ unsigned getDivNumBits (BinaryOperator &I, Value *Num, Value *Den,
258
+ unsigned MaxDivBits, bool Signed) const ;
260
259
261
260
// / Expands 24 bit div or rem.
262
261
Value* expandDivRem24 (IRBuilder<> &Builder, BinaryOperator &I,
@@ -1189,27 +1188,29 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
1189
1188
return getMul64 (Builder, LHS, RHS).second ;
1190
1189
}
1191
1190
1192
- // / Figure out how many bits are really needed for this division. \p AtLeast is
1193
- // / an optimization hint to bypass the second ComputeNumSignBits call if we the
1194
- // / first one is insufficient. Returns -1 on failure.
1195
- int AMDGPUCodeGenPrepareImpl::getDivNumBits (BinaryOperator &I, Value *Num,
1196
- Value *Den, unsigned AtLeast,
1197
- bool IsSigned) const {
1191
+ // / Figure out how many bits are really needed for this division.
1192
+ // / \p MaxDivBits is an optimization hint to bypass the second
1193
+ // / ComputeNumSignBits/computeKnownBits call if the first one is
1194
+ // / insufficient.
1195
+ unsigned AMDGPUCodeGenPrepareImpl::getDivNumBits (BinaryOperator &I, Value *Num,
1196
+ Value *Den,
1197
+ unsigned MaxDivBits,
1198
+ bool IsSigned) const {
1198
1199
assert (Num->getType ()->getScalarSizeInBits () ==
1199
1200
Den->getType ()->getScalarSizeInBits ());
1200
1201
unsigned SSBits = Num->getType ()->getScalarSizeInBits ();
1201
1202
if (IsSigned) {
1202
1203
unsigned RHSSignBits = ComputeNumSignBits (Den, DL, 0 , AC, &I);
1203
- if (RHSSignBits < AtLeast)
1204
- return -1 ;
1204
+ // A sign bit needs to be reserved for shrinking.
1205
+ unsigned DivBits = SSBits - RHSSignBits + 1 ;
1206
+ if (DivBits > MaxDivBits)
1207
+ return SSBits;
1205
1208
1206
1209
unsigned LHSSignBits = ComputeNumSignBits (Num, DL, 0 , AC, &I);
1207
- if (LHSSignBits < AtLeast)
1208
- return -1 ;
1209
1210
1210
1211
unsigned SignBits = std::min (LHSSignBits, RHSSignBits);
1211
- unsigned DivBits = SSBits - SignBits + 1 ;
1212
- return DivBits; // a SignBit needs to be reserved for shrinking
1212
+ DivBits = SSBits - SignBits + 1 ;
1213
+ return DivBits;
1213
1214
}
1214
1215
1215
1216
// All bits are used for unsigned division for Num or Den in range
@@ -1218,14 +1219,17 @@ int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
1218
1219
if (Known.isNegative () || !Known.isNonNegative ())
1219
1220
return SSBits;
1220
1221
unsigned RHSSignBits = Known.countMinLeadingZeros ();
1222
+ unsigned DivBits = SSBits - RHSSignBits;
1223
+ if (DivBits > MaxDivBits)
1224
+ return SSBits;
1221
1225
1222
1226
Known = computeKnownBits (Num, DL, 0 , AC, &I);
1223
1227
if (Known.isNegative () || !Known.isNonNegative ())
1224
1228
return SSBits;
1225
1229
unsigned LHSSignBits = Known.countMinLeadingZeros ();
1226
1230
1227
1231
unsigned SignBits = std::min (LHSSignBits, RHSSignBits);
1228
- unsigned DivBits = SSBits - SignBits;
1232
+ DivBits = SSBits - SignBits;
1229
1233
return DivBits;
1230
1234
}
1231
1235
@@ -1235,11 +1239,8 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder,
1235
1239
BinaryOperator &I, Value *Num,
1236
1240
Value *Den, bool IsDiv,
1237
1241
bool IsSigned) const {
1238
- unsigned SSBits = Num->getType ()->getScalarSizeInBits ();
1239
- // If Num bits <= 24, assume 0 signbits.
1240
- unsigned AtLeast = (SSBits <= 24 ) ? 0 : (SSBits - 24 + IsSigned);
1241
- int DivBits = getDivNumBits (I, Num, Den, AtLeast, IsSigned);
1242
- if (DivBits == -1 || DivBits > 24 )
1242
+ unsigned DivBits = getDivNumBits (I, Num, Den, 24 , IsSigned);
1243
+ if (DivBits > 24 )
1243
1244
return nullptr ;
1244
1245
return expandDivRem24Impl (Builder, I, Num, Den, DivBits, IsDiv, IsSigned);
1245
1246
}
@@ -1523,9 +1524,8 @@ Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
1523
1524
bool IsDiv = Opc == Instruction::SDiv || Opc == Instruction::UDiv;
1524
1525
bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
1525
1526
1526
- unsigned BitWidth = Num->getType ()->getScalarSizeInBits ();
1527
- int NumDivBits = getDivNumBits (I, Num, Den, BitWidth - 32 , IsSigned);
1528
- if (NumDivBits == -1 )
1527
+ unsigned NumDivBits = getDivNumBits (I, Num, Den, 32 , IsSigned);
1528
+ if (NumDivBits > 32 )
1529
1529
return nullptr ;
1530
1530
1531
1531
Value *Narrowed = nullptr ;
0 commit comments