Skip to content

Commit cca766c

Browse files
pfusikvar-const
authored andcommitted
[DAGCombiner] Fold subtraction if above threshold to umin (llvm#134235)
Folds patterns such as: unsigned foo(unsigned x, unsigned y) { return x >= y ? x - y : x; } Before, on RISC-V: sltu a2, a0, a1 addi a2, a2, -1 and a1, a1, a2 subw a0, a0, a1 Or, with Zicond: sltu a2, a0, a1 czero.nez a1, a1, a2 subw a0, a0, a1 After, with Zbb: subw a1, a0, a1 minu a0, a0, a1 Only applies to unsigned comparisons. If `x >= y` then `x - y` is less than or equal `x`. Otherwise, `x - y` wraps and is greater than `x`.
1 parent eb5fa0f commit cca766c

File tree

3 files changed

+216
-84
lines changed

3 files changed

+216
-84
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4251,6 +4251,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
42514251
sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
42524252
return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
42534253

4254+
// (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4255+
// (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4256+
auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
4257+
if ((LK.first == TargetLoweringBase::TypeLegal ||
4258+
LK.first == TargetLoweringBase::TypePromoteInteger) &&
4259+
TLI.isOperationLegal(ISD::UMIN, LK.second)) {
4260+
SDValue Y;
4261+
if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
4262+
m_SpecificCondCode(ISD::SETULT)),
4263+
m_Zero(), m_Deferred(Y)))) ||
4264+
sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
4265+
m_SpecificCondCode(ISD::SETUGE)),
4266+
m_Deferred(Y), m_Zero()))))
4267+
return DAG.getNode(ISD::UMIN, DL, VT, N0,
4268+
DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4269+
}
4270+
42544271
return SDValue();
42554272
}
42564273

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 89 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,15 +1481,23 @@ entry:
14811481
}
14821482

14831483
define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
1484-
; CHECK-LABEL: sub_if_uge_i8:
1485-
; CHECK: # %bb.0:
1486-
; CHECK-NEXT: zext.b a2, a1
1487-
; CHECK-NEXT: zext.b a3, a0
1488-
; CHECK-NEXT: sltu a2, a3, a2
1489-
; CHECK-NEXT: addi a2, a2, -1
1490-
; CHECK-NEXT: and a1, a2, a1
1491-
; CHECK-NEXT: sub a0, a0, a1
1492-
; CHECK-NEXT: ret
1484+
; RV32I-LABEL: sub_if_uge_i8:
1485+
; RV32I: # %bb.0:
1486+
; RV32I-NEXT: zext.b a2, a1
1487+
; RV32I-NEXT: zext.b a3, a0
1488+
; RV32I-NEXT: sltu a2, a3, a2
1489+
; RV32I-NEXT: addi a2, a2, -1
1490+
; RV32I-NEXT: and a1, a2, a1
1491+
; RV32I-NEXT: sub a0, a0, a1
1492+
; RV32I-NEXT: ret
1493+
;
1494+
; RV32ZBB-LABEL: sub_if_uge_i8:
1495+
; RV32ZBB: # %bb.0:
1496+
; RV32ZBB-NEXT: zext.b a2, a0
1497+
; RV32ZBB-NEXT: sub a0, a0, a1
1498+
; RV32ZBB-NEXT: zext.b a0, a0
1499+
; RV32ZBB-NEXT: minu a0, a2, a0
1500+
; RV32ZBB-NEXT: ret
14931501
%cmp = icmp ult i8 %x, %y
14941502
%select = select i1 %cmp, i8 0, i8 %y
14951503
%sub = sub nuw i8 %x, %select
@@ -1511,12 +1519,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
15111519
;
15121520
; RV32ZBB-LABEL: sub_if_uge_i16:
15131521
; RV32ZBB: # %bb.0:
1514-
; RV32ZBB-NEXT: zext.h a2, a1
1515-
; RV32ZBB-NEXT: zext.h a3, a0
1516-
; RV32ZBB-NEXT: sltu a2, a3, a2
1517-
; RV32ZBB-NEXT: addi a2, a2, -1
1518-
; RV32ZBB-NEXT: and a1, a2, a1
1522+
; RV32ZBB-NEXT: zext.h a2, a0
15191523
; RV32ZBB-NEXT: sub a0, a0, a1
1524+
; RV32ZBB-NEXT: zext.h a0, a0
1525+
; RV32ZBB-NEXT: minu a0, a2, a0
15201526
; RV32ZBB-NEXT: ret
15211527
%cmp = icmp ult i16 %x, %y
15221528
%select = select i1 %cmp, i16 0, i16 %y
@@ -1525,13 +1531,19 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
15251531
}
15261532

15271533
define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
1528-
; CHECK-LABEL: sub_if_uge_i32:
1529-
; CHECK: # %bb.0:
1530-
; CHECK-NEXT: sltu a2, a0, a1
1531-
; CHECK-NEXT: addi a2, a2, -1
1532-
; CHECK-NEXT: and a1, a2, a1
1533-
; CHECK-NEXT: sub a0, a0, a1
1534-
; CHECK-NEXT: ret
1534+
; RV32I-LABEL: sub_if_uge_i32:
1535+
; RV32I: # %bb.0:
1536+
; RV32I-NEXT: sltu a2, a0, a1
1537+
; RV32I-NEXT: addi a2, a2, -1
1538+
; RV32I-NEXT: and a1, a2, a1
1539+
; RV32I-NEXT: sub a0, a0, a1
1540+
; RV32I-NEXT: ret
1541+
;
1542+
; RV32ZBB-LABEL: sub_if_uge_i32:
1543+
; RV32ZBB: # %bb.0:
1544+
; RV32ZBB-NEXT: sub a1, a0, a1
1545+
; RV32ZBB-NEXT: minu a0, a0, a1
1546+
; RV32ZBB-NEXT: ret
15351547
%cmp = icmp ult i32 %x, %y
15361548
%select = select i1 %cmp, i32 0, i32 %y
15371549
%sub = sub nuw i32 %x, %select
@@ -1643,25 +1655,66 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
16431655
}
16441656

16451657
define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
1646-
; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
1647-
; CHECK: # %bb.0:
1648-
; CHECK-NEXT: sltu a2, a0, a1
1649-
; CHECK-NEXT: addi a2, a2, -1
1650-
; CHECK-NEXT: and a2, a2, a1
1651-
; CHECK-NEXT: sub a2, a0, a2
1652-
; CHECK-NEXT: bltu a0, a1, .LBB55_2
1653-
; CHECK-NEXT: # %bb.1:
1654-
; CHECK-NEXT: li a0, 4
1655-
; CHECK-NEXT: sll a0, a2, a0
1656-
; CHECK-NEXT: ret
1657-
; CHECK-NEXT: .LBB55_2:
1658-
; CHECK-NEXT: li a0, 2
1659-
; CHECK-NEXT: sll a0, a2, a0
1660-
; CHECK-NEXT: ret
1658+
; RV32I-LABEL: sub_if_uge_multiuse_cmp_i32:
1659+
; RV32I: # %bb.0:
1660+
; RV32I-NEXT: sltu a2, a0, a1
1661+
; RV32I-NEXT: addi a2, a2, -1
1662+
; RV32I-NEXT: and a2, a2, a1
1663+
; RV32I-NEXT: sub a2, a0, a2
1664+
; RV32I-NEXT: bltu a0, a1, .LBB55_2
1665+
; RV32I-NEXT: # %bb.1:
1666+
; RV32I-NEXT: li a0, 4
1667+
; RV32I-NEXT: sll a0, a2, a0
1668+
; RV32I-NEXT: ret
1669+
; RV32I-NEXT: .LBB55_2:
1670+
; RV32I-NEXT: li a0, 2
1671+
; RV32I-NEXT: sll a0, a2, a0
1672+
; RV32I-NEXT: ret
1673+
;
1674+
; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
1675+
; RV32ZBB: # %bb.0:
1676+
; RV32ZBB-NEXT: sub a2, a0, a1
1677+
; RV32ZBB-NEXT: minu a2, a0, a2
1678+
; RV32ZBB-NEXT: bltu a0, a1, .LBB55_2
1679+
; RV32ZBB-NEXT: # %bb.1:
1680+
; RV32ZBB-NEXT: li a0, 4
1681+
; RV32ZBB-NEXT: sll a0, a2, a0
1682+
; RV32ZBB-NEXT: ret
1683+
; RV32ZBB-NEXT: .LBB55_2:
1684+
; RV32ZBB-NEXT: li a0, 2
1685+
; RV32ZBB-NEXT: sll a0, a2, a0
1686+
; RV32ZBB-NEXT: ret
16611687
%cmp = icmp ult i32 %x, %y
16621688
%select = select i1 %cmp, i32 0, i32 %y
16631689
%sub = sub nuw i32 %x, %select
16641690
%select2 = select i1 %cmp, i32 2, i32 4
16651691
%shl = shl i32 %sub, %select2
16661692
ret i32 %shl
16671693
}
1694+
1695+
define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 %x, i32 %y, ptr %z) {
1696+
; RV32I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
1697+
; RV32I: # %bb.0:
1698+
; RV32I-NEXT: sltu a3, a0, a1
1699+
; RV32I-NEXT: xori a4, a3, 1
1700+
; RV32I-NEXT: addi a3, a3, -1
1701+
; RV32I-NEXT: and a1, a3, a1
1702+
; RV32I-NEXT: sub a0, a0, a1
1703+
; RV32I-NEXT: sw a4, 0(a2)
1704+
; RV32I-NEXT: ret
1705+
;
1706+
; RV32ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
1707+
; RV32ZBB: # %bb.0:
1708+
; RV32ZBB-NEXT: sltu a3, a0, a1
1709+
; RV32ZBB-NEXT: sub a1, a0, a1
1710+
; RV32ZBB-NEXT: xori a3, a3, 1
1711+
; RV32ZBB-NEXT: minu a0, a0, a1
1712+
; RV32ZBB-NEXT: sw a3, 0(a2)
1713+
; RV32ZBB-NEXT: ret
1714+
%cmp = icmp uge i32 %x, %y
1715+
%conv = zext i1 %cmp to i32
1716+
store i32 %conv, ptr %z, align 4
1717+
%select = select i1 %cmp, i32 %y, i32 0
1718+
%sub = sub nuw i32 %x, %select
1719+
ret i32 %sub
1720+
}

llvm/test/CodeGen/RISCV/rv64zbb.ll

Lines changed: 110 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,15 +1682,23 @@ entry:
16821682
}
16831683

16841684
define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
1685-
; CHECK-LABEL: sub_if_uge_i8:
1686-
; CHECK: # %bb.0:
1687-
; CHECK-NEXT: zext.b a2, a1
1688-
; CHECK-NEXT: zext.b a3, a0
1689-
; CHECK-NEXT: sltu a2, a3, a2
1690-
; CHECK-NEXT: addi a2, a2, -1
1691-
; CHECK-NEXT: and a1, a2, a1
1692-
; CHECK-NEXT: sub a0, a0, a1
1693-
; CHECK-NEXT: ret
1685+
; RV64I-LABEL: sub_if_uge_i8:
1686+
; RV64I: # %bb.0:
1687+
; RV64I-NEXT: zext.b a2, a1
1688+
; RV64I-NEXT: zext.b a3, a0
1689+
; RV64I-NEXT: sltu a2, a3, a2
1690+
; RV64I-NEXT: addi a2, a2, -1
1691+
; RV64I-NEXT: and a1, a2, a1
1692+
; RV64I-NEXT: sub a0, a0, a1
1693+
; RV64I-NEXT: ret
1694+
;
1695+
; RV64ZBB-LABEL: sub_if_uge_i8:
1696+
; RV64ZBB: # %bb.0:
1697+
; RV64ZBB-NEXT: zext.b a2, a0
1698+
; RV64ZBB-NEXT: subw a0, a0, a1
1699+
; RV64ZBB-NEXT: zext.b a0, a0
1700+
; RV64ZBB-NEXT: minu a0, a2, a0
1701+
; RV64ZBB-NEXT: ret
16941702
%cmp = icmp ult i8 %x, %y
16951703
%select = select i1 %cmp, i8 0, i8 %y
16961704
%sub = sub nuw i8 %x, %select
@@ -1712,12 +1720,10 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
17121720
;
17131721
; RV64ZBB-LABEL: sub_if_uge_i16:
17141722
; RV64ZBB: # %bb.0:
1715-
; RV64ZBB-NEXT: zext.h a2, a1
1716-
; RV64ZBB-NEXT: zext.h a3, a0
1717-
; RV64ZBB-NEXT: sltu a2, a3, a2
1718-
; RV64ZBB-NEXT: addi a2, a2, -1
1719-
; RV64ZBB-NEXT: and a1, a2, a1
1720-
; RV64ZBB-NEXT: sub a0, a0, a1
1723+
; RV64ZBB-NEXT: zext.h a2, a0
1724+
; RV64ZBB-NEXT: subw a0, a0, a1
1725+
; RV64ZBB-NEXT: zext.h a0, a0
1726+
; RV64ZBB-NEXT: minu a0, a2, a0
17211727
; RV64ZBB-NEXT: ret
17221728
%cmp = icmp ult i16 %x, %y
17231729
%select = select i1 %cmp, i16 0, i16 %y
@@ -1726,29 +1732,42 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
17261732
}
17271733

17281734
define i32 @sub_if_uge_i32(i32 %x, i32 %y) {
1729-
; CHECK-LABEL: sub_if_uge_i32:
1730-
; CHECK: # %bb.0:
1731-
; CHECK-NEXT: sext.w a2, a1
1732-
; CHECK-NEXT: sext.w a3, a0
1733-
; CHECK-NEXT: sltu a2, a3, a2
1734-
; CHECK-NEXT: addi a2, a2, -1
1735-
; CHECK-NEXT: and a1, a2, a1
1736-
; CHECK-NEXT: subw a0, a0, a1
1737-
; CHECK-NEXT: ret
1735+
; RV64I-LABEL: sub_if_uge_i32:
1736+
; RV64I: # %bb.0:
1737+
; RV64I-NEXT: sext.w a2, a1
1738+
; RV64I-NEXT: sext.w a3, a0
1739+
; RV64I-NEXT: sltu a2, a3, a2
1740+
; RV64I-NEXT: addi a2, a2, -1
1741+
; RV64I-NEXT: and a1, a2, a1
1742+
; RV64I-NEXT: subw a0, a0, a1
1743+
; RV64I-NEXT: ret
1744+
;
1745+
; RV64ZBB-LABEL: sub_if_uge_i32:
1746+
; RV64ZBB: # %bb.0:
1747+
; RV64ZBB-NEXT: sext.w a2, a0
1748+
; RV64ZBB-NEXT: subw a0, a0, a1
1749+
; RV64ZBB-NEXT: minu a0, a2, a0
1750+
; RV64ZBB-NEXT: ret
17381751
%cmp = icmp ult i32 %x, %y
17391752
%select = select i1 %cmp, i32 0, i32 %y
17401753
%sub = sub nuw i32 %x, %select
17411754
ret i32 %sub
17421755
}
17431756

17441757
define i64 @sub_if_uge_i64(i64 %x, i64 %y) {
1745-
; CHECK-LABEL: sub_if_uge_i64:
1746-
; CHECK: # %bb.0:
1747-
; CHECK-NEXT: sltu a2, a0, a1
1748-
; CHECK-NEXT: addi a2, a2, -1
1749-
; CHECK-NEXT: and a1, a2, a1
1750-
; CHECK-NEXT: sub a0, a0, a1
1751-
; CHECK-NEXT: ret
1758+
; RV64I-LABEL: sub_if_uge_i64:
1759+
; RV64I: # %bb.0:
1760+
; RV64I-NEXT: sltu a2, a0, a1
1761+
; RV64I-NEXT: addi a2, a2, -1
1762+
; RV64I-NEXT: and a1, a2, a1
1763+
; RV64I-NEXT: sub a0, a0, a1
1764+
; RV64I-NEXT: ret
1765+
;
1766+
; RV64ZBB-LABEL: sub_if_uge_i64:
1767+
; RV64ZBB: # %bb.0:
1768+
; RV64ZBB-NEXT: sub a1, a0, a1
1769+
; RV64ZBB-NEXT: minu a0, a0, a1
1770+
; RV64ZBB-NEXT: ret
17521771
%cmp = icmp ult i64 %x, %y
17531772
%select = select i1 %cmp, i64 0, i64 %y
17541773
%sub = sub nuw i64 %x, %select
@@ -1798,27 +1817,70 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
17981817
}
17991818

18001819
define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
1801-
; CHECK-LABEL: sub_if_uge_multiuse_cmp_i32:
1802-
; CHECK: # %bb.0:
1803-
; CHECK-NEXT: sext.w a2, a1
1804-
; CHECK-NEXT: sext.w a3, a0
1805-
; CHECK-NEXT: sltu a4, a3, a2
1806-
; CHECK-NEXT: addi a4, a4, -1
1807-
; CHECK-NEXT: and a1, a4, a1
1808-
; CHECK-NEXT: subw a0, a0, a1
1809-
; CHECK-NEXT: bltu a3, a2, .LBB68_2
1810-
; CHECK-NEXT: # %bb.1:
1811-
; CHECK-NEXT: li a1, 4
1812-
; CHECK-NEXT: sllw a0, a0, a1
1813-
; CHECK-NEXT: ret
1814-
; CHECK-NEXT: .LBB68_2:
1815-
; CHECK-NEXT: li a1, 2
1816-
; CHECK-NEXT: sllw a0, a0, a1
1817-
; CHECK-NEXT: ret
1820+
; RV64I-LABEL: sub_if_uge_multiuse_cmp_i32:
1821+
; RV64I: # %bb.0:
1822+
; RV64I-NEXT: sext.w a2, a1
1823+
; RV64I-NEXT: sext.w a3, a0
1824+
; RV64I-NEXT: sltu a4, a3, a2
1825+
; RV64I-NEXT: addi a4, a4, -1
1826+
; RV64I-NEXT: and a1, a4, a1
1827+
; RV64I-NEXT: subw a0, a0, a1
1828+
; RV64I-NEXT: bltu a3, a2, .LBB68_2
1829+
; RV64I-NEXT: # %bb.1:
1830+
; RV64I-NEXT: li a1, 4
1831+
; RV64I-NEXT: sllw a0, a0, a1
1832+
; RV64I-NEXT: ret
1833+
; RV64I-NEXT: .LBB68_2:
1834+
; RV64I-NEXT: li a1, 2
1835+
; RV64I-NEXT: sllw a0, a0, a1
1836+
; RV64I-NEXT: ret
1837+
;
1838+
; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_i32:
1839+
; RV64ZBB: # %bb.0:
1840+
; RV64ZBB-NEXT: sext.w a2, a1
1841+
; RV64ZBB-NEXT: sext.w a3, a0
1842+
; RV64ZBB-NEXT: subw a0, a0, a1
1843+
; RV64ZBB-NEXT: minu a0, a3, a0
1844+
; RV64ZBB-NEXT: bltu a3, a2, .LBB68_2
1845+
; RV64ZBB-NEXT: # %bb.1:
1846+
; RV64ZBB-NEXT: li a1, 4
1847+
; RV64ZBB-NEXT: sllw a0, a0, a1
1848+
; RV64ZBB-NEXT: ret
1849+
; RV64ZBB-NEXT: .LBB68_2:
1850+
; RV64ZBB-NEXT: li a1, 2
1851+
; RV64ZBB-NEXT: sllw a0, a0, a1
1852+
; RV64ZBB-NEXT: ret
18181853
%cmp = icmp ult i32 %x, %y
18191854
%select = select i1 %cmp, i32 0, i32 %y
18201855
%sub = sub nuw i32 %x, %select
18211856
%select2 = select i1 %cmp, i32 2, i32 4
18221857
%shl = shl i32 %sub, %select2
18231858
ret i32 %shl
18241859
}
1860+
1861+
define i32 @sub_if_uge_multiuse_cmp_store_i32(i32 signext %x, i32 signext %y, ptr %z) {
1862+
; RV64I-LABEL: sub_if_uge_multiuse_cmp_store_i32:
1863+
; RV64I: # %bb.0:
1864+
; RV64I-NEXT: sltu a3, a0, a1
1865+
; RV64I-NEXT: xori a4, a3, 1
1866+
; RV64I-NEXT: addi a3, a3, -1
1867+
; RV64I-NEXT: and a1, a3, a1
1868+
; RV64I-NEXT: subw a0, a0, a1
1869+
; RV64I-NEXT: sw a4, 0(a2)
1870+
; RV64I-NEXT: ret
1871+
;
1872+
; RV64ZBB-LABEL: sub_if_uge_multiuse_cmp_store_i32:
1873+
; RV64ZBB: # %bb.0:
1874+
; RV64ZBB-NEXT: sltu a3, a0, a1
1875+
; RV64ZBB-NEXT: subw a1, a0, a1
1876+
; RV64ZBB-NEXT: xori a3, a3, 1
1877+
; RV64ZBB-NEXT: minu a0, a0, a1
1878+
; RV64ZBB-NEXT: sw a3, 0(a2)
1879+
; RV64ZBB-NEXT: ret
1880+
%cmp = icmp uge i32 %x, %y
1881+
%conv = zext i1 %cmp to i32
1882+
store i32 %conv, ptr %z, align 4
1883+
%select = select i1 %cmp, i32 %y, i32 0
1884+
%sub = sub nuw i32 %x, %select
1885+
ret i32 %sub
1886+
}

0 commit comments

Comments
 (0)