Skip to content

Commit 496fcb2

Browse files
committed
[AMDGPU] Rework getDivNumBits API
Rework involves 3 things: - return unsigned value. - change from AtLeast(SignBits) to MaxDivBits hint. - use MaxDivBits hint for unsigned case.
1 parent 8d2e611 commit 496fcb2

File tree

1 file changed

+30
-24
lines changed

1 file changed

+30
-24
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,8 @@ class AMDGPUCodeGenPrepareImpl
254254

255255
bool divHasSpecialOptimization(BinaryOperator &I,
256256
Value *Num, Value *Den) const;
257-
int getDivNumBits(BinaryOperator &I,
258-
Value *Num, Value *Den,
259-
unsigned AtLeast, bool Signed) const;
257+
unsigned getDivNumBits(BinaryOperator &I, Value *Num, Value *Den,
258+
unsigned AtLeast, bool Signed) const;
260259

261260
/// Expands 24 bit div or rem.
262261
Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
@@ -1189,27 +1188,32 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
11891188
return getMul64(Builder, LHS, RHS).second;
11901189
}
11911190

1192-
/// Figure out how many bits are really needed for this division. \p AtLeast is
1193-
/// an optimization hint to bypass the second ComputeNumSignBits call if we the
1194-
/// first one is insufficient. Returns -1 on failure.
1195-
int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
1196-
Value *Den, unsigned AtLeast,
1197-
bool IsSigned) const {
1191+
/// Figure out how many bits are really needed for this division.
1192+
/// \p MaxDivBits is an optimization hint to bypass the second
1193+
/// ComputeNumSignBits/computeKnownBits call if we the first one is
1194+
/// insufficient.
1195+
unsigned AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
1196+
Value *Den,
1197+
unsigned MaxDivBits,
1198+
bool IsSigned) const {
11981199
assert(Num->getType()->getScalarSizeInBits() ==
11991200
Den->getType()->getScalarSizeInBits());
12001201
unsigned SSBits = Num->getType()->getScalarSizeInBits();
12011202
if (IsSigned) {
12021203
unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I);
1203-
if (RHSSignBits < AtLeast)
1204-
return -1;
1204+
// a SignBit needs to be reserved for shrinking
1205+
unsigned DivBits = SSBits - RHSSignBits + 1;
1206+
if (DivBits > MaxDivBits)
1207+
return DivBits;
12051208

12061209
unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
1207-
if (LHSSignBits < AtLeast)
1208-
return -1;
1210+
DivBits = SSBits - LHSSignBits + 1;
1211+
if (DivBits > MaxDivBits)
1212+
return DivBits;
12091213

12101214
unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1211-
unsigned DivBits = SSBits - SignBits + 1;
1212-
return DivBits; // a SignBit needs to be reserved for shrinking
1215+
DivBits = SSBits - SignBits + 1;
1216+
return DivBits;
12131217
}
12141218

12151219
// All bits are used for unsigned division for Num or Den in range
@@ -1218,14 +1222,20 @@ int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
12181222
if (Known.isNegative() || !Known.isNonNegative())
12191223
return SSBits;
12201224
unsigned RHSSignBits = Known.countMinLeadingZeros();
1225+
unsigned DivBits = SSBits - RHSSignBits;
1226+
if (DivBits > MaxDivBits)
1227+
return DivBits;
12211228

12221229
Known = computeKnownBits(Num, DL, 0, AC, &I);
12231230
if (Known.isNegative() || !Known.isNonNegative())
12241231
return SSBits;
12251232
unsigned LHSSignBits = Known.countMinLeadingZeros();
1233+
DivBits = SSBits - LHSSignBits;
1234+
if (DivBits > MaxDivBits)
1235+
return DivBits;
12261236

12271237
unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1228-
unsigned DivBits = SSBits - SignBits;
1238+
DivBits = SSBits - SignBits;
12291239
return DivBits;
12301240
}
12311241

@@ -1235,11 +1245,8 @@ Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder,
12351245
BinaryOperator &I, Value *Num,
12361246
Value *Den, bool IsDiv,
12371247
bool IsSigned) const {
1238-
unsigned SSBits = Num->getType()->getScalarSizeInBits();
1239-
// If Num bits <= 24, assume 0 signbits.
1240-
unsigned AtLeast = (SSBits <= 24) ? 0 : (SSBits - 24 + IsSigned);
1241-
int DivBits = getDivNumBits(I, Num, Den, AtLeast, IsSigned);
1242-
if (DivBits == -1 || DivBits > 24)
1248+
unsigned DivBits = getDivNumBits(I, Num, Den, 24, IsSigned);
1249+
if (DivBits > 24)
12431250
return nullptr;
12441251
return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned);
12451252
}
@@ -1523,9 +1530,8 @@ Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
15231530
bool IsDiv = Opc == Instruction::SDiv || Opc == Instruction::UDiv;
15241531
bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
15251532

1526-
unsigned BitWidth = Num->getType()->getScalarSizeInBits();
1527-
int NumDivBits = getDivNumBits(I, Num, Den, BitWidth - 32, IsSigned);
1528-
if (NumDivBits == -1)
1533+
unsigned NumDivBits = getDivNumBits(I, Num, Den, 32, IsSigned);
1534+
if (NumDivBits > 32)
15291535
return nullptr;
15301536

15311537
Value *Narrowed = nullptr;

0 commit comments

Comments
 (0)