@@ -1487,6 +1487,7 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
14871487// / lowsum = xh*yl + xl*yh + (xl*yl>>32)
14881488// / Ladder: xh*yh + c2>>32 + c3>>32
14891489// / c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1490+ // / or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xl*yh
14901491// / Carry4: xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32
14911492// / crosssum = xh*yl + xl*yh
14921493// / carry = crosssum < xh*yl ? 0x1000000 : 0
@@ -1510,9 +1511,9 @@ static bool foldMulHigh(Instruction &I) {
15101511 Type *NTy = Ty->getWithNewBitWidth (BW * 2 );
15111512 Value *XExt = Builder.CreateZExt (X, NTy);
15121513 Value *YExt = Builder.CreateZExt (Y, NTy);
1513- Value *Mul = Builder.CreateMul (XExt, YExt);
1514+ Value *Mul = Builder.CreateMul (XExt, YExt, " " , true );
15141515 Value *High = Builder.CreateLShr (Mul, BW);
1515- Value *Res = Builder.CreateTrunc (High, Ty);
1516+ Value *Res = Builder.CreateTrunc (High, Ty, " " , true );
15161517 Res->takeName (&I);
15171518 I.replaceAllUsesWith (Res);
15181519 LLVM_DEBUG (dbgs () << " Created long multiply from parts of " << *X << " and "
@@ -1542,8 +1543,7 @@ static bool foldMulHigh(Instruction &I) {
15421543 m_OneUse (m_Select (
15431544 m_OneUse (m_SpecificICmp (ICmpInst::ICMP_ULT, m_Value (LowSum),
15441545 m_Value (XhYl))),
1545- m_SpecificInt (APInt::getOneBitSet (BW, BW / 2 )),
1546- m_SpecificInt (0 )))))
1546+ m_SpecificInt (APInt::getOneBitSet (BW, BW / 2 )), m_Zero ()))))
15471547 return false ;
15481548
15491549 // XhYl can be Xh*Yl or Xl*Yh
@@ -1556,7 +1556,7 @@ static bool foldMulHigh(Instruction &I) {
15561556 if (XhYl->hasNUsesOrMore (3 ))
15571557 return false ;
15581558
1559- // B = LowSum >> 16
1559+ // B = LowSum >> 32
15601560 if (!match (B,
15611561 m_OneUse (m_LShr (m_Specific (LowSum), m_SpecificInt (BW / 2 )))) ||
15621562 LowSum->hasNUsesOrMore (3 ))
@@ -1587,28 +1587,43 @@ static bool foldMulHigh(Instruction &I) {
15871587 auto FoldMulHighLadder = [&](Value *X, Value *Y, Instruction *A,
15881588 Instruction *B) {
15891589 // xh*yh + c2>>32 + c3>>32
1590- // c2 = xh*yl + (xl*yl >> 32); c3 = c2&0xffffffff + xl*yh
1591- Value *XlYh, *XhYl, *C2, *C3;
1590+ // c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1591+ // or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xh*yl
1592+ Value *XlYh, *XhYl, *XlYl, *C2, *C3;
15921593 // Strip off the two expected shifts.
15931594 if (!match (A, m_LShr (m_Value (C2), m_SpecificInt (BW / 2 ))) ||
15941595 !match (B, m_LShr (m_Value (C3), m_SpecificInt (BW / 2 ))))
15951596 return false ;
15961597
1597- // Match c3 = c2&0xffffffff + xl*yh
1598- if (!match (C3, m_c_Add (m_And (m_Specific (C2), m_SpecificInt (LowMask)),
1599- m_Value (XhYl))))
1598+ if (match (C3, m_c_Add (m_Add (m_Value (), m_Value ()), m_Value ())))
16001599 std::swap (C2, C3);
1601- if (!match (C3,
1602- m_c_Add (m_OneUse (m_And (m_Specific (C2), m_SpecificInt (LowMask))),
1603- m_Value (XhYl))) ||
1604- !C3->hasOneUse () || C2->hasNUsesOrMore (3 ))
1605- return false ;
1600+ // Try to match c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32)
1601+ if (match (C2, m_c_Add (m_c_Add (m_And (m_Specific (C3), m_SpecificInt (LowMask)),
1602+ m_Value (XlYh)),
1603+ m_LShr (m_Value (XlYl), m_SpecificInt (BW / 2 )))) ||
1604+ match (C2, m_c_Add (m_c_Add (m_And (m_Specific (C3), m_SpecificInt (LowMask)),
1605+ m_LShr (m_Value (XlYl), m_SpecificInt (BW / 2 ))),
1606+ m_Value (XlYh))) ||
1607+ match (C2, m_c_Add (m_c_Add (m_LShr (m_Value (XlYl), m_SpecificInt (BW / 2 )),
1608+ m_Value (XlYh)),
1609+ m_And (m_Specific (C3), m_SpecificInt (LowMask))))) {
1610+ XhYl = C3;
1611+ } else {
1612+ // Match c3 = c2&0xffffffff + xl*yh
1613+ if (!match (C3, m_c_Add (m_And (m_Specific (C2), m_SpecificInt (LowMask)),
1614+ m_Value (XlYh))))
1615+ std::swap (C2, C3);
1616+ if (!match (C3, m_c_Add (m_OneUse (
1617+ m_And (m_Specific (C2), m_SpecificInt (LowMask))),
1618+ m_Value (XlYh))) ||
1619+ !C3->hasOneUse () || C2->hasNUsesOrMore (3 ))
1620+ return false ;
16061621
1607- // Match c2 = xh*yl + (xl*yl >> 32)
1608- Value * XlYl;
1609- if (! match (C2, m_c_Add ( m_LShr ( m_Value (XlYl), m_SpecificInt (BW / 2 )),
1610- m_Value (XlYh))))
1611- return false ;
1622+ // Match c2 = xh*yl + (xl*yl >> 32)
1623+ if (! match (C2, m_c_Add ( m_LShr ( m_Value ( XlYl), m_SpecificInt (BW / 2 )),
1624+ m_Value (XhYl))))
1625+ return false ;
1626+ }
16121627
16131628 // Match XhYl and XlYh - they can appear either way around.
16141629 if (!CheckHiLo (XlYh, Y, X))
@@ -1696,8 +1711,7 @@ static bool foldMulHigh(Instruction &I) {
16961711 m_OneUse (m_Select (
16971712 m_OneUse (m_SpecificICmp (ICmpInst::ICMP_ULT,
16981713 m_Value (CrossSum), m_Value (XhYl))),
1699- m_SpecificInt (APInt::getOneBitSet (BW, BW / 2 )),
1700- m_SpecificInt (0 )))))
1714+ m_SpecificInt (APInt::getOneBitSet (BW, BW / 2 )), m_Zero ()))))
17011715 return false ;
17021716
17031717 if (!match (B, m_LShr (m_Specific (CrossSum), m_SpecificInt (BW / 2 ))))
@@ -1720,12 +1734,10 @@ static bool foldMulHigh(Instruction &I) {
17201734 std::swap (X, Y);
17211735 if (!CheckHiLo (XhYl, X, Y))
17221736 return false ;
1723- if (!match (CrossSum,
1724- m_c_Add (m_Specific (XhYl),
1725- m_OneUse (m_c_Mul (
1726- m_LShr (m_Specific (Y), m_SpecificInt (BW / 2 )),
1727- m_And (m_Specific (X), m_SpecificInt (LowMask)))))) ||
1728- CrossSum->hasNUsesOrMore (4 ) || XhYl->hasNUsesOrMore (3 ))
1737+ Value *XlYh;
1738+ if (!match (CrossSum, m_c_Add (m_Specific (XhYl), m_OneUse (m_Value (XlYh)))) ||
1739+ !CheckHiLo (XlYh, Y, X) || CrossSum->hasNUsesOrMore (4 ) ||
1740+ XhYl->hasNUsesOrMore (3 ))
17291741 return false ;
17301742
17311743 return CreateMulHigh (X, Y);
0 commit comments