Skip to content

Commit de66d5d

Browse files
committed
Push isCtpopFast check down, merge with adjustPower2Test
1 parent 2ce5f4a commit de66d5d

File tree

1 file changed

+43
-57
lines changed

1 file changed

+43
-57
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 43 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ class CodeGenPrepare {
474474
bool optimizeURem(Instruction *Rem);
475475
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476476
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477-
bool unfoldPow2Test(CmpInst *Cmp);
477+
bool unfoldPowerOf2Test(CmpInst *Cmp);
478478
void verifyBFIUpdates(Function &F);
479479
bool _run(Function &F);
480480
};
@@ -1764,7 +1764,9 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
17641764
}
17651765

17661766
// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1767-
bool CodeGenPrepare::unfoldPow2Test(CmpInst *Cmp) {
1767+
// The same transformation exists in DAG combiner, but we repeat it here because
1768+
// DAG builder can break the pattern by moving icmp into a successor block.
1769+
bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
17681770
CmpPredicate Pred;
17691771
Value *X;
17701772
const APInt *C;
@@ -1774,48 +1776,60 @@ bool CodeGenPrepare::unfoldPow2Test(CmpInst *Cmp) {
17741776
m_APIntAllowPoison(C))))
17751777
return false;
17761778

1777-
// This transformation increases the number of instructions, don't do it if
1778-
// ctpop is fast.
1779-
Type *OpTy = X->getType();
1780-
if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy)))
1779+
// We're only interested in "is power of 2 [or zero]" patterns.
1780+
bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1781+
bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1782+
(Pred == CmpInst::ICMP_UGT && *C == 1);
1783+
if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
17811784
return false;
17821785

1783-
// ctpop(x) u< 2 -> (x & (x - 1)) == 0
1784-
// ctpop(x) u> 1 -> (x & (x - 1)) != 0
1785-
// Also handles ctpop(x) == 1 and ctpop(x) != 1 if ctpop(x) is known non-zero.
1786-
if ((Pred == CmpInst::ICMP_ULT && *C == 2) ||
1787-
(Pred == CmpInst::ICMP_UGT && *C == 1) ||
1788-
(ICmpInst::isEquality(Pred) && *C == 1 &&
1789-
isKnownNonZero(Cmp->getOperand(0), *DL))) {
1786+
// Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1787+
// `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1788+
// and otherwise expand ctpop into a few simple instructions.
1789+
Type *OpTy = X->getType();
1790+
if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1791+
// Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1792+
if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1793+
return false;
1794+
1795+
// ctpop(x) == 1 -> ctpop(x) u< 2
1796+
// ctpop(x) != 1 -> ctpop(x) u> 1
1797+
if (Pred == ICmpInst::ICMP_EQ) {
1798+
Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1799+
Cmp->setPredicate(ICmpInst::ICMP_ULT);
1800+
} else {
1801+
Cmp->setPredicate(ICmpInst::ICMP_UGT);
1802+
}
1803+
return true;
1804+
}
1805+
1806+
Value *NewCmp;
1807+
if (IsPowerOf2OrZeroTest ||
1808+
(IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1809+
// ctpop(x) u< 2 -> (x & (x - 1)) == 0
1810+
// ctpop(x) u> 1 -> (x & (x - 1)) != 0
17901811
IRBuilder<> Builder(Cmp);
17911812
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
17921813
Value *And = Builder.CreateAnd(X, Sub);
17931814
CmpInst::Predicate NewPred =
17941815
(Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
17951816
? CmpInst::ICMP_EQ
17961817
: CmpInst::ICMP_NE;
1797-
Value *NewCmp =
1798-
Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1799-
Cmp->replaceAllUsesWith(NewCmp);
1800-
RecursivelyDeleteTriviallyDeadInstructions(Cmp);
1801-
return true;
1802-
}
1803-
1804-
// ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1805-
// ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1806-
if (ICmpInst::isEquality(Pred) && *C == 1) {
1818+
NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1819+
} else {
1820+
// ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1821+
// ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
18071822
IRBuilder<> Builder(Cmp);
18081823
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
18091824
Value *Xor = Builder.CreateXor(X, Sub);
18101825
CmpInst::Predicate NewPred =
18111826
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1812-
Value *NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1813-
Cmp->replaceAllUsesWith(NewCmp);
1814-
RecursivelyDeleteTriviallyDeadInstructions(Cmp);
1815-
return true;
1827+
NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
18161828
}
18171829

1818-
return false;
1830+
Cmp->replaceAllUsesWith(NewCmp);
1831+
RecursivelyDeleteTriviallyDeadInstructions(Cmp);
1832+
return true;
18191833
}
18201834

18211835
/// Sink the given CmpInst into user blocks to reduce the number of virtual
@@ -2204,31 +2218,6 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
22042218
return false;
22052219
}
22062220

2207-
/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2208-
/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2209-
/// result cannot be zero.
2210-
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI,
2211-
const TargetTransformInfo &TTI,
2212-
const DataLayout &DL) {
2213-
CmpPredicate Pred;
2214-
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
2215-
return false;
2216-
if (!ICmpInst::isEquality(Pred))
2217-
return false;
2218-
auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
2219-
2220-
if (isKnownNonZero(II, DL)) {
2221-
if (Pred == ICmpInst::ICMP_EQ) {
2222-
Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
2223-
Cmp->setPredicate(ICmpInst::ICMP_ULT);
2224-
} else {
2225-
Cmp->setPredicate(ICmpInst::ICMP_UGT);
2226-
}
2227-
return true;
2228-
}
2229-
return false;
2230-
}
2231-
22322221
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
22332222
if (sinkCmpExpression(Cmp, *TLI))
22342223
return true;
@@ -2239,7 +2228,7 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
22392228
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
22402229
return true;
22412230

2242-
if (unfoldPow2Test(Cmp))
2231+
if (unfoldPowerOf2Test(Cmp))
22432232
return true;
22442233

22452234
if (foldICmpWithDominatingICmp(Cmp, *TLI))
@@ -2251,9 +2240,6 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
22512240
if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
22522241
return true;
22532242

2254-
if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
2255-
return true;
2256-
22572243
return false;
22582244
}
22592245

0 commit comments

Comments
 (0)