Skip to content

Commit f06ebc3

Browse files
committed
Address Comments 2
1 parent d4ab4a3 commit f06ebc3

File tree

2 files changed

+80
-28
lines changed

2 files changed

+80
-28
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,6 +1487,7 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
14871487
/// lowsum = xh*yl + xl*yh + (xl*yl>>32)
14881488
/// Ladder: xh*yh + c2>>32 + c3>>32
14891489
/// c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1490+
/// or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xl*yh
14901491
/// Carry4: xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32
14911492
/// crosssum = xh*yl + xl*yh
14921493
/// carry = crosssum < xh*yl ? 0x1000000 : 0
@@ -1510,9 +1511,9 @@ static bool foldMulHigh(Instruction &I) {
15101511
Type *NTy = Ty->getWithNewBitWidth(BW * 2);
15111512
Value *XExt = Builder.CreateZExt(X, NTy);
15121513
Value *YExt = Builder.CreateZExt(Y, NTy);
1513-
Value *Mul = Builder.CreateMul(XExt, YExt);
1514+
Value *Mul = Builder.CreateMul(XExt, YExt, "", true);
15141515
Value *High = Builder.CreateLShr(Mul, BW);
1515-
Value *Res = Builder.CreateTrunc(High, Ty);
1516+
Value *Res = Builder.CreateTrunc(High, Ty, "", true);
15161517
Res->takeName(&I);
15171518
I.replaceAllUsesWith(Res);
15181519
LLVM_DEBUG(dbgs() << "Created long multiply from parts of " << *X << " and "
@@ -1542,8 +1543,7 @@ static bool foldMulHigh(Instruction &I) {
15421543
m_OneUse(m_Select(
15431544
m_OneUse(m_SpecificICmp(ICmpInst::ICMP_ULT, m_Value(LowSum),
15441545
m_Value(XhYl))),
1545-
m_SpecificInt(APInt::getOneBitSet(BW, BW / 2)),
1546-
m_SpecificInt(0)))))
1546+
m_SpecificInt(APInt::getOneBitSet(BW, BW / 2)), m_Zero()))))
15471547
return false;
15481548

15491549
// XhYl can be Xh*Yl or Xl*Yh
@@ -1556,7 +1556,7 @@ static bool foldMulHigh(Instruction &I) {
15561556
if (XhYl->hasNUsesOrMore(3))
15571557
return false;
15581558

1559-
// B = LowSum >> 16
1559+
// B = LowSum >> 32
15601560
if (!match(B,
15611561
m_OneUse(m_LShr(m_Specific(LowSum), m_SpecificInt(BW / 2)))) ||
15621562
LowSum->hasNUsesOrMore(3))
@@ -1587,28 +1587,43 @@ static bool foldMulHigh(Instruction &I) {
15871587
auto FoldMulHighLadder = [&](Value *X, Value *Y, Instruction *A,
15881588
Instruction *B) {
15891589
// xh*yh + c2>>32 + c3>>32
1590-
// c2 = xh*yl + (xl*yl >> 32); c3 = c2&0xffffffff + xl*yh
1591-
Value *XlYh, *XhYl, *C2, *C3;
1590+
// c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1591+
// or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xh*yl
1592+
Value *XlYh, *XhYl, *XlYl, *C2, *C3;
15921593
// Strip off the two expected shifts.
15931594
if (!match(A, m_LShr(m_Value(C2), m_SpecificInt(BW / 2))) ||
15941595
!match(B, m_LShr(m_Value(C3), m_SpecificInt(BW / 2))))
15951596
return false;
15961597

1597-
// Match c3 = c2&0xffffffff + xl*yh
1598-
if (!match(C3, m_c_Add(m_And(m_Specific(C2), m_SpecificInt(LowMask)),
1599-
m_Value(XhYl))))
1598+
if (match(C3, m_c_Add(m_Add(m_Value(), m_Value()), m_Value())))
16001599
std::swap(C2, C3);
1601-
if (!match(C3,
1602-
m_c_Add(m_OneUse(m_And(m_Specific(C2), m_SpecificInt(LowMask))),
1603-
m_Value(XhYl))) ||
1604-
!C3->hasOneUse() || C2->hasNUsesOrMore(3))
1605-
return false;
1600+
// Try to match c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32)
1601+
if (match(C2, m_c_Add(m_c_Add(m_And(m_Specific(C3), m_SpecificInt(LowMask)),
1602+
m_Value(XlYh)),
1603+
m_LShr(m_Value(XlYl), m_SpecificInt(BW / 2)))) ||
1604+
match(C2, m_c_Add(m_c_Add(m_And(m_Specific(C3), m_SpecificInt(LowMask)),
1605+
m_LShr(m_Value(XlYl), m_SpecificInt(BW / 2))),
1606+
m_Value(XlYh))) ||
1607+
match(C2, m_c_Add(m_c_Add(m_LShr(m_Value(XlYl), m_SpecificInt(BW / 2)),
1608+
m_Value(XlYh)),
1609+
m_And(m_Specific(C3), m_SpecificInt(LowMask))))) {
1610+
XhYl = C3;
1611+
} else {
1612+
// Match c3 = c2&0xffffffff + xl*yh
1613+
if (!match(C3, m_c_Add(m_And(m_Specific(C2), m_SpecificInt(LowMask)),
1614+
m_Value(XlYh))))
1615+
std::swap(C2, C3);
1616+
if (!match(C3, m_c_Add(m_OneUse(
1617+
m_And(m_Specific(C2), m_SpecificInt(LowMask))),
1618+
m_Value(XlYh))) ||
1619+
!C3->hasOneUse() || C2->hasNUsesOrMore(3))
1620+
return false;
16061621

1607-
// Match c2 = xh*yl + (xl*yl >> 32)
1608-
Value *XlYl;
1609-
if (!match(C2, m_c_Add(m_LShr(m_Value(XlYl), m_SpecificInt(BW / 2)),
1610-
m_Value(XlYh))))
1611-
return false;
1622+
// Match c2 = xh*yl + (xl*yl >> 32)
1623+
if (!match(C2, m_c_Add(m_LShr(m_Value(XlYl), m_SpecificInt(BW / 2)),
1624+
m_Value(XhYl))))
1625+
return false;
1626+
}
16121627

16131628
// Match XhYl and XlYh - they can appear either way around.
16141629
if (!CheckHiLo(XlYh, Y, X))
@@ -1696,8 +1711,7 @@ static bool foldMulHigh(Instruction &I) {
16961711
m_OneUse(m_Select(
16971712
m_OneUse(m_SpecificICmp(ICmpInst::ICMP_ULT,
16981713
m_Value(CrossSum), m_Value(XhYl))),
1699-
m_SpecificInt(APInt::getOneBitSet(BW, BW / 2)),
1700-
m_SpecificInt(0)))))
1714+
m_SpecificInt(APInt::getOneBitSet(BW, BW / 2)), m_Zero()))))
17011715
return false;
17021716

17031717
if (!match(B, m_LShr(m_Specific(CrossSum), m_SpecificInt(BW / 2))))
@@ -1720,12 +1734,10 @@ static bool foldMulHigh(Instruction &I) {
17201734
std::swap(X, Y);
17211735
if (!CheckHiLo(XhYl, X, Y))
17221736
return false;
1723-
if (!match(CrossSum,
1724-
m_c_Add(m_Specific(XhYl),
1725-
m_OneUse(m_c_Mul(
1726-
m_LShr(m_Specific(Y), m_SpecificInt(BW / 2)),
1727-
m_And(m_Specific(X), m_SpecificInt(LowMask)))))) ||
1728-
CrossSum->hasNUsesOrMore(4) || XhYl->hasNUsesOrMore(3))
1737+
Value *XlYh;
1738+
if (!match(CrossSum, m_c_Add(m_Specific(XhYl), m_OneUse(m_Value(XlYh)))) ||
1739+
!CheckHiLo(XlYh, Y, X) || CrossSum->hasNUsesOrMore(4) ||
1740+
XhYl->hasNUsesOrMore(3))
17291741
return false;
17301742

17311743
return CreateMulHigh(X, Y);

llvm/test/Transforms/AggressiveInstCombine/umulh_ladder.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,3 +816,43 @@ define i64 @umulh_variant__mul_use__u2(i64 %x, i64 %y) {
816816
%hw64 = add nuw i64 %u2, %u1_hi
817817
ret i64 %hw64
818818
}
819+
820+
define [2 x i64] @XXH_mult64to128(i64 noundef %lhs, i64 noundef %rhs) {
821+
; CHECK-LABEL: define [2 x i64] @XXH_mult64to128(
822+
; CHECK-SAME: i64 noundef [[LHS:%.*]], i64 noundef [[RHS:%.*]]) {
823+
; CHECK-NEXT: [[ENTRY:.*:]]
824+
; CHECK-NEXT: [[TMP0:%.*]] = zext i64 [[RHS]] to i128
825+
; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[LHS]] to i128
826+
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i128 [[TMP0]], [[TMP1]]
827+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i128 [[TMP2]], 64
828+
; CHECK-NEXT: [[ADD16:%.*]] = trunc nuw i128 [[TMP3]] to i64
829+
; CHECK-NEXT: [[SHR102:%.*]] = mul i64 [[LHS]], [[RHS]]
830+
; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i64] poison, i64 [[SHR102]], 0
831+
; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i64] [[DOTFCA_0_INSERT]], i64 [[ADD16]], 1
832+
; CHECK-NEXT: ret [2 x i64] [[DOTFCA_1_INSERT]]
833+
;
834+
entry:
835+
%and = and i64 %lhs, 4294967295
836+
%and1 = and i64 %rhs, 4294967295
837+
%mul.i = mul nuw i64 %and1, %and
838+
%shr = lshr i64 %lhs, 32
839+
%mul.i27 = mul nuw i64 %and1, %shr
840+
%shr5 = lshr i64 %rhs, 32
841+
%mul.i28 = mul nuw i64 %shr5, %and
842+
%mul.i29 = mul nuw i64 %shr5, %shr
843+
%shr10 = lshr i64 %mul.i, 32
844+
%and11 = and i64 %mul.i27, 4294967295
845+
%add = add nuw i64 %and11, %mul.i28
846+
%add12 = add nuw i64 %add, %shr10
847+
%shr13 = lshr i64 %mul.i27, 32
848+
%shr14 = lshr i64 %add12, 32
849+
%add15 = add nuw i64 %shr13, %mul.i29
850+
%add16 = add nuw i64 %add15, %shr14
851+
%shl = shl i64 %add12, 32
852+
%and17 = and i64 %mul.i, 4294967295
853+
%or = or disjoint i64 %shl, %and17
854+
%.fca.0.insert = insertvalue [2 x i64] poison, i64 %or, 0
855+
%.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %add16, 1
856+
ret [2 x i64] %.fca.1.insert
857+
}
858+

0 commit comments

Comments
 (0)