@@ -474,7 +474,7 @@ class CodeGenPrepare {
474474 bool optimizeURem (Instruction *Rem);
475475 bool combineToUSubWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
476476 bool combineToUAddWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
477- bool unfoldPow2Test (CmpInst *Cmp);
477+ bool unfoldPowerOf2Test (CmpInst *Cmp);
478478 void verifyBFIUpdates (Function &F);
479479 bool _run (Function &F);
480480};
@@ -1764,7 +1764,9 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
17641764}
17651765
17661766// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1767- bool CodeGenPrepare::unfoldPow2Test (CmpInst *Cmp) {
1767+ // The same transformation exists in DAG combiner, but we repeat it here because
1768+ // DAG builder can break the pattern by moving icmp into a successor block.
1769+ bool CodeGenPrepare::unfoldPowerOf2Test (CmpInst *Cmp) {
17681770 CmpPredicate Pred;
17691771 Value *X;
17701772 const APInt *C;
@@ -1774,48 +1776,60 @@ bool CodeGenPrepare::unfoldPow2Test(CmpInst *Cmp) {
17741776 m_APIntAllowPoison (C))))
17751777 return false ;
17761778
1777- // This transformation increases the number of instructions, don't do it if
1778- // ctpop is fast.
1779- Type *OpTy = X->getType ();
1780- if (TLI->isCtpopFast (TLI->getValueType (*DL, OpTy)))
1779+ // We're only interested in "is power of 2 [or zero]" patterns.
1780+ bool IsStrictlyPowerOf2Test = ICmpInst::isEquality (Pred) && *C == 1 ;
1781+ bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2 ) ||
1782+ (Pred == CmpInst::ICMP_UGT && *C == 1 );
1783+ if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
17811784 return false ;
17821785
1783- // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1784- // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1785- // Also handles ctpop(x) == 1 and ctpop(x) != 1 if ctpop(x) is known non-zero.
1786- if ((Pred == CmpInst::ICMP_ULT && *C == 2 ) ||
1787- (Pred == CmpInst::ICMP_UGT && *C == 1 ) ||
1788- (ICmpInst::isEquality (Pred) && *C == 1 &&
1789- isKnownNonZero (Cmp->getOperand (0 ), *DL))) {
1786+ // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1787+ // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1788+ // and otherwise expand ctpop into a few simple instructions.
1789+ Type *OpTy = X->getType ();
1790+ if (TLI->isCtpopFast (TLI->getValueType (*DL, OpTy))) {
1791+ // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1792+ if (!IsStrictlyPowerOf2Test || !isKnownNonZero (Cmp->getOperand (0 ), *DL))
1793+ return false ;
1794+
1795+ // ctpop(x) == 1 -> ctpop(x) u< 2
1796+ // ctpop(x) != 1 -> ctpop(x) u> 1
1797+ if (Pred == ICmpInst::ICMP_EQ) {
1798+ Cmp->setOperand (1 , ConstantInt::get (OpTy, 2 ));
1799+ Cmp->setPredicate (ICmpInst::ICMP_ULT);
1800+ } else {
1801+ Cmp->setPredicate (ICmpInst::ICMP_UGT);
1802+ }
1803+ return true ;
1804+ }
1805+
1806+ Value *NewCmp;
1807+ if (IsPowerOf2OrZeroTest ||
1808+ (IsStrictlyPowerOf2Test && isKnownNonZero (Cmp->getOperand (0 ), *DL))) {
1809+ // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1810+ // ctpop(x) u> 1 -> (x & (x - 1)) != 0
17901811 IRBuilder<> Builder (Cmp);
17911812 Value *Sub = Builder.CreateAdd (X, Constant::getAllOnesValue (OpTy));
17921813 Value *And = Builder.CreateAnd (X, Sub);
17931814 CmpInst::Predicate NewPred =
17941815 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
17951816 ? CmpInst::ICMP_EQ
17961817 : CmpInst::ICMP_NE;
1797- Value *NewCmp =
1798- Builder.CreateICmp (NewPred, And, ConstantInt::getNullValue (OpTy));
1799- Cmp->replaceAllUsesWith (NewCmp);
1800- RecursivelyDeleteTriviallyDeadInstructions (Cmp);
1801- return true ;
1802- }
1803-
1804- // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1805- // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1806- if (ICmpInst::isEquality (Pred) && *C == 1 ) {
1818+ NewCmp = Builder.CreateICmp (NewPred, And, ConstantInt::getNullValue (OpTy));
1819+ } else {
1820+ // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1821+ // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
18071822 IRBuilder<> Builder (Cmp);
18081823 Value *Sub = Builder.CreateAdd (X, Constant::getAllOnesValue (OpTy));
18091824 Value *Xor = Builder.CreateXor (X, Sub);
18101825 CmpInst::Predicate NewPred =
18111826 Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1812- Value *NewCmp = Builder.CreateICmp (NewPred, Xor, Sub);
1813- Cmp->replaceAllUsesWith (NewCmp);
1814- RecursivelyDeleteTriviallyDeadInstructions (Cmp);
1815- return true ;
1827+ NewCmp = Builder.CreateICmp (NewPred, Xor, Sub);
18161828 }
18171829
1818- return false ;
1830+ Cmp->replaceAllUsesWith (NewCmp);
1831+ RecursivelyDeleteTriviallyDeadInstructions (Cmp);
1832+ return true ;
18191833}
18201834
18211835// / Sink the given CmpInst into user blocks to reduce the number of virtual
@@ -2204,31 +2218,6 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
22042218 return false ;
22052219}
22062220
2207- // / Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2208- // / This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2209- // / result cannot be zero.
2210- static bool adjustIsPower2Test (CmpInst *Cmp, const TargetLowering &TLI,
2211- const TargetTransformInfo &TTI,
2212- const DataLayout &DL) {
2213- CmpPredicate Pred;
2214- if (!match (Cmp, m_ICmp (Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One ())))
2215- return false ;
2216- if (!ICmpInst::isEquality (Pred))
2217- return false ;
2218- auto *II = cast<IntrinsicInst>(Cmp->getOperand (0 ));
2219-
2220- if (isKnownNonZero (II, DL)) {
2221- if (Pred == ICmpInst::ICMP_EQ) {
2222- Cmp->setOperand (1 , ConstantInt::get (II->getType (), 2 ));
2223- Cmp->setPredicate (ICmpInst::ICMP_ULT);
2224- } else {
2225- Cmp->setPredicate (ICmpInst::ICMP_UGT);
2226- }
2227- return true ;
2228- }
2229- return false ;
2230- }
2231-
22322221bool CodeGenPrepare::optimizeCmp (CmpInst *Cmp, ModifyDT &ModifiedDT) {
22332222 if (sinkCmpExpression (Cmp, *TLI))
22342223 return true ;
@@ -2239,7 +2228,7 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
22392228 if (combineToUSubWithOverflow (Cmp, ModifiedDT))
22402229 return true ;
22412230
2242- if (unfoldPow2Test (Cmp))
2231+ if (unfoldPowerOf2Test (Cmp))
22432232 return true ;
22442233
22452234 if (foldICmpWithDominatingICmp (Cmp, *TLI))
@@ -2251,9 +2240,6 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
22512240 if (foldFCmpToFPClassTest (Cmp, *TLI, *DL))
22522241 return true ;
22532242
2254- if (adjustIsPower2Test (Cmp, *TLI, *TTI, *DL))
2255- return true ;
2256-
22572243 return false ;
22582244}
22592245
0 commit comments