@@ -474,6 +474,7 @@ class CodeGenPrepare {
474474 bool optimizeURem (Instruction *Rem);
475475 bool combineToUSubWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
476476 bool combineToUAddWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
477+ bool unfoldPowerOf2Test (CmpInst *Cmp);
477478 void verifyBFIUpdates (Function &F);
478479 bool _run (Function &F);
479480};
@@ -1762,6 +1763,75 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
17621763 return true ;
17631764}
17641765
1766+ // Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1767+ // The same transformation exists in DAG combiner, but we repeat it here because
1768+ // DAG builder can break the pattern by moving icmp into a successor block.
1769+ bool CodeGenPrepare::unfoldPowerOf2Test (CmpInst *Cmp) {
1770+ CmpPredicate Pred;
1771+ Value *X;
1772+ const APInt *C;
1773+
1774+ // (icmp (ctpop x), c)
1775+ if (!match (Cmp, m_ICmp (Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value (X)),
1776+ m_APIntAllowPoison (C))))
1777+ return false ;
1778+
1779+ // We're only interested in "is power of 2 [or zero]" patterns.
1780+ bool IsStrictlyPowerOf2Test = ICmpInst::isEquality (Pred) && *C == 1 ;
1781+ bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2 ) ||
1782+ (Pred == CmpInst::ICMP_UGT && *C == 1 );
1783+ if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1784+ return false ;
1785+
1786+ // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1787+ // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1788+ // and otherwise expand ctpop into a few simple instructions.
1789+ Type *OpTy = X->getType ();
1790+ if (TLI->isCtpopFast (TLI->getValueType (*DL, OpTy))) {
1791+ // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1792+ if (!IsStrictlyPowerOf2Test || !isKnownNonZero (Cmp->getOperand (0 ), *DL))
1793+ return false ;
1794+
1795+ // ctpop(x) == 1 -> ctpop(x) u< 2
1796+ // ctpop(x) != 1 -> ctpop(x) u> 1
1797+ if (Pred == ICmpInst::ICMP_EQ) {
1798+ Cmp->setOperand (1 , ConstantInt::get (OpTy, 2 ));
1799+ Cmp->setPredicate (ICmpInst::ICMP_ULT);
1800+ } else {
1801+ Cmp->setPredicate (ICmpInst::ICMP_UGT);
1802+ }
1803+ return true ;
1804+ }
1805+
1806+ Value *NewCmp;
1807+ if (IsPowerOf2OrZeroTest ||
1808+ (IsStrictlyPowerOf2Test && isKnownNonZero (Cmp->getOperand (0 ), *DL))) {
1809+ // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1810+ // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1811+ IRBuilder<> Builder (Cmp);
1812+ Value *Sub = Builder.CreateAdd (X, Constant::getAllOnesValue (OpTy));
1813+ Value *And = Builder.CreateAnd (X, Sub);
1814+ CmpInst::Predicate NewPred =
1815+ (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1816+ ? CmpInst::ICMP_EQ
1817+ : CmpInst::ICMP_NE;
1818+ NewCmp = Builder.CreateICmp (NewPred, And, ConstantInt::getNullValue (OpTy));
1819+ } else {
1820+ // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1821+ // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1822+ IRBuilder<> Builder (Cmp);
1823+ Value *Sub = Builder.CreateAdd (X, Constant::getAllOnesValue (OpTy));
1824+ Value *Xor = Builder.CreateXor (X, Sub);
1825+ CmpInst::Predicate NewPred =
1826+ Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1827+ NewCmp = Builder.CreateICmp (NewPred, Xor, Sub);
1828+ }
1829+
1830+ Cmp->replaceAllUsesWith (NewCmp);
1831+ RecursivelyDeleteTriviallyDeadInstructions (Cmp);
1832+ return true ;
1833+ }
1834+
17651835// / Sink the given CmpInst into user blocks to reduce the number of virtual
17661836// / registers that must be created and coalesced. This is a clear win except on
17671837// / targets with multiple condition code registers (PowerPC), where it might
@@ -2148,31 +2218,6 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
21482218 return false ;
21492219}
21502220
2151- // / Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2152- // / This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2153- // / result cannot be zero.
2154- static bool adjustIsPower2Test (CmpInst *Cmp, const TargetLowering &TLI,
2155- const TargetTransformInfo &TTI,
2156- const DataLayout &DL) {
2157- CmpPredicate Pred;
2158- if (!match (Cmp, m_ICmp (Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One ())))
2159- return false ;
2160- if (!ICmpInst::isEquality (Pred))
2161- return false ;
2162- auto *II = cast<IntrinsicInst>(Cmp->getOperand (0 ));
2163-
2164- if (isKnownNonZero (II, DL)) {
2165- if (Pred == ICmpInst::ICMP_EQ) {
2166- Cmp->setOperand (1 , ConstantInt::get (II->getType (), 2 ));
2167- Cmp->setPredicate (ICmpInst::ICMP_ULT);
2168- } else {
2169- Cmp->setPredicate (ICmpInst::ICMP_UGT);
2170- }
2171- return true ;
2172- }
2173- return false ;
2174- }
2175-
21762221bool CodeGenPrepare::optimizeCmp (CmpInst *Cmp, ModifyDT &ModifiedDT) {
21772222 if (sinkCmpExpression (Cmp, *TLI))
21782223 return true ;
@@ -2183,6 +2228,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21832228 if (combineToUSubWithOverflow (Cmp, ModifiedDT))
21842229 return true ;
21852230
2231+ if (unfoldPowerOf2Test (Cmp))
2232+ return true ;
2233+
21862234 if (foldICmpWithDominatingICmp (Cmp, *TLI))
21872235 return true ;
21882236
@@ -2192,9 +2240,6 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21922240 if (foldFCmpToFPClassTest (Cmp, *TLI, *DL))
21932241 return true ;
21942242
2195- if (adjustIsPower2Test (Cmp, *TLI, *TTI, *DL))
2196- return true ;
2197-
21982243 return false ;
21992244}
22002245
0 commit comments