@@ -1531,6 +1531,62 @@ static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
1531
1531
return Q;
1532
1532
}
1533
1533
1534
+ // / Given an affine expression of the form A*k + B, where k is an arbitrary
1535
+ // / integer, infer the possible range of k based on the known range of the
1536
+ // / affine expression. If we know A*k + B is non-negative, i.e.,
1537
+ // /
1538
+ // / A*k + B >= 0
1539
+ // /
1540
+ // / we can derive the following inequalities for k when A is positive:
1541
+ // /
1542
+ // / k >= -B / A
1543
+ // /
1544
+ // / Since k is an integer, it means k is greater than or equal to the
1545
+ // / ceil(-B / A).
1546
+ // /
1547
+ // / If the upper bound of the affine expression \p UB is passed, the following
1548
+ // / inequality can be derived as well:
1549
+ // /
1550
+ // / A*k + B <= UB
1551
+ // /
1552
+ // / which leads to:
1553
+ // /
1554
+ // / k <= (UB - B) / A
1555
+ // /
1556
+ // / Again, as k is an integer, it means k is less than or equal to the
1557
+ // / floor((UB - B) / A).
1558
+ // /
1559
+ // / The similar logic applies when A is negative, but the inequalities sign flip
1560
+ // / while working with them.
1561
+ // /
1562
+ // / Preconditions: \p A is non-zero, and we know A*k + B is non-negative.
1563
+ static std::pair<std::optional<APInt>, std::optional<APInt>>
1564
+ inferDomainOfAffine (const APInt &A, const APInt &B,
1565
+ const std::optional<APInt> &UB) {
1566
+ assert (A != 0 && " A must be non-zero" );
1567
+ std::optional<APInt> TL, TU;
1568
+ if (A.sgt (0 )) {
1569
+ TL = ceilingOfQuotient (-B, A);
1570
+ LLVM_DEBUG (dbgs () << " \t Possible TL = " << *TL << " \n " );
1571
+ // New bound check - modification to Banerjee's e3 check
1572
+ if (UB) {
1573
+ // TODO?: Overflow check for UB - B
1574
+ TU = floorOfQuotient (*UB - B, A);
1575
+ LLVM_DEBUG (dbgs () << " \t Possible TU = " << *TU << " \n " );
1576
+ }
1577
+ } else {
1578
+ TU = floorOfQuotient (-B, A);
1579
+ LLVM_DEBUG (dbgs () << " \t Possible TU = " << *TU << " \n " );
1580
+ // New bound check - modification to Banerjee's e3 check
1581
+ if (UB) {
1582
+ // TODO?: Overflow check for UB - B
1583
+ TL = ceilingOfQuotient (*UB - B, A);
1584
+ LLVM_DEBUG (dbgs () << " \t Possible TL = " << *TL << " \n " );
1585
+ }
1586
+ }
1587
+ return std::make_pair (TL, TU);
1588
+ }
1589
+
1534
1590
// exactSIVtest -
1535
1591
// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1536
1592
// where i is an induction variable, c1 and c2 are loop invariant, and a1
@@ -1590,14 +1646,12 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1590
1646
LLVM_DEBUG (dbgs () << " \t X = " << X << " , Y = " << Y << " \n " );
1591
1647
1592
1648
// since SCEV construction normalizes, LM = 0
1593
- APInt UM (Bits, 1 , true );
1594
- bool UMValid = false ;
1649
+ std::optional<APInt> UM;
1595
1650
// UM is perhaps unavailable, let's check
1596
1651
if (const SCEVConstant *CUB =
1597
1652
collectConstantUpperBound (CurLoop, Delta->getType ())) {
1598
1653
UM = CUB->getAPInt ();
1599
- LLVM_DEBUG (dbgs () << " \t UM = " << UM << " \n " );
1600
- UMValid = true ;
1654
+ LLVM_DEBUG (dbgs () << " \t UM = " << *UM << " \n " );
1601
1655
}
1602
1656
1603
1657
APInt TU (APInt::getSignedMaxValue (Bits));
@@ -1609,44 +1663,33 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1609
1663
LLVM_DEBUG (dbgs () << " \t TX = " << TX << " \n " );
1610
1664
LLVM_DEBUG (dbgs () << " \t TY = " << TY << " \n " );
1611
1665
1612
- SmallVector<APInt, 2 > TLVec, TUVec;
1613
1666
APInt TB = BM.sdiv (G);
1614
- if (TB.sgt (0 )) {
1615
- TLVec.push_back (ceilingOfQuotient (-TX, TB));
1616
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1617
- // New bound check - modification to Banerjee's e3 check
1618
- if (UMValid) {
1619
- TUVec.push_back (floorOfQuotient (UM - TX, TB));
1620
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1621
- }
1622
- } else {
1623
- TUVec.push_back (floorOfQuotient (-TX, TB));
1624
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1625
- // New bound check - modification to Banerjee's e3 check
1626
- if (UMValid) {
1627
- TLVec.push_back (ceilingOfQuotient (UM - TX, TB));
1628
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1629
- }
1630
- }
1631
-
1632
1667
APInt TA = AM.sdiv (G);
1633
- if (TA.sgt (0 )) {
1634
- if (UMValid) {
1635
- TUVec.push_back (floorOfQuotient (UM - TY, TA));
1636
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1637
- }
1638
- // New bound check - modification to Banerjee's e3 check
1639
- TLVec.push_back (ceilingOfQuotient (-TY, TA));
1640
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1641
- } else {
1642
- if (UMValid) {
1643
- TLVec.push_back (ceilingOfQuotient (UM - TY, TA));
1644
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1645
- }
1646
- // New bound check - modification to Banerjee's e3 check
1647
- TUVec.push_back (floorOfQuotient (-TY, TA));
1648
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1649
- }
1668
+
1669
+ // At this point, we have the following equations:
1670
+ //
1671
+ // TA*i0 - TB*i1 = TC
1672
+ //
1673
+ // Also, we know that the all pairs of (i0, i1) can be expressed as:
1674
+ //
1675
+ // (TX + k*TB, TY + k*TA)
1676
+ //
1677
+ // where k is an arbitrary integer.
1678
+ auto [TL0, TU0] = inferDomainOfAffine (TB, TX, UM);
1679
+ auto [TL1, TU1] = inferDomainOfAffine (TA, TY, UM);
1680
+
1681
+ auto CreateVec = [](const std::optional<APInt> &V0,
1682
+ const std::optional<APInt> &V1) {
1683
+ SmallVector<APInt, 2 > Vec;
1684
+ if (V0)
1685
+ Vec.push_back (*V0);
1686
+ if (V1)
1687
+ Vec.push_back (*V1);
1688
+ return Vec;
1689
+ };
1690
+
1691
+ SmallVector<APInt, 2 > TLVec = CreateVec (TL0, TL1);
1692
+ SmallVector<APInt, 2 > TUVec = CreateVec (TU0, TU1);
1650
1693
1651
1694
LLVM_DEBUG (dbgs () << " \t TA = " << TA << " \n " );
1652
1695
LLVM_DEBUG (dbgs () << " \t TB = " << TB << " \n " );
@@ -1967,24 +2010,20 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1967
2010
LLVM_DEBUG (dbgs () << " \t X = " << X << " , Y = " << Y << " \n " );
1968
2011
1969
2012
// since SCEV construction seems to normalize, LM = 0
1970
- APInt SrcUM (Bits, 1 , true );
1971
- bool SrcUMvalid = false ;
2013
+ std::optional<APInt> SrcUM;
1972
2014
// SrcUM is perhaps unavailable, let's check
1973
2015
if (const SCEVConstant *UpperBound =
1974
2016
collectConstantUpperBound (SrcLoop, Delta->getType ())) {
1975
2017
SrcUM = UpperBound->getAPInt ();
1976
- LLVM_DEBUG (dbgs () << " \t SrcUM = " << SrcUM << " \n " );
1977
- SrcUMvalid = true ;
2018
+ LLVM_DEBUG (dbgs () << " \t SrcUM = " << *SrcUM << " \n " );
1978
2019
}
1979
2020
1980
- APInt DstUM (Bits, 1 , true );
1981
- bool DstUMvalid = false ;
2021
+ std::optional<APInt> DstUM;
1982
2022
// UM is perhaps unavailable, let's check
1983
2023
if (const SCEVConstant *UpperBound =
1984
2024
collectConstantUpperBound (DstLoop, Delta->getType ())) {
1985
2025
DstUM = UpperBound->getAPInt ();
1986
- LLVM_DEBUG (dbgs () << " \t DstUM = " << DstUM << " \n " );
1987
- DstUMvalid = true ;
2026
+ LLVM_DEBUG (dbgs () << " \t DstUM = " << *DstUM << " \n " );
1988
2027
}
1989
2028
1990
2029
APInt TU (APInt::getSignedMaxValue (Bits));
@@ -1996,47 +2035,39 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
1996
2035
LLVM_DEBUG (dbgs () << " \t TX = " << TX << " \n " );
1997
2036
LLVM_DEBUG (dbgs () << " \t TY = " << TY << " \n " );
1998
2037
1999
- SmallVector<APInt, 2 > TLVec, TUVec;
2000
2038
APInt TB = BM.sdiv (G);
2001
- if (TB.sgt (0 )) {
2002
- TLVec.push_back (ceilingOfQuotient (-TX, TB));
2003
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2004
- if (SrcUMvalid) {
2005
- TUVec.push_back (floorOfQuotient (SrcUM - TX, TB));
2006
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2007
- }
2008
- } else {
2009
- TUVec.push_back (floorOfQuotient (-TX, TB));
2010
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2011
- if (SrcUMvalid) {
2012
- TLVec.push_back (ceilingOfQuotient (SrcUM - TX, TB));
2013
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2014
- }
2015
- }
2016
-
2017
2039
APInt TA = AM.sdiv (G);
2018
- if (TA.sgt (0 )) {
2019
- TLVec.push_back (ceilingOfQuotient (-TY, TA));
2020
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2021
- if (DstUMvalid) {
2022
- TUVec.push_back (floorOfQuotient (DstUM - TY, TA));
2023
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2024
- }
2025
- } else {
2026
- TUVec.push_back (floorOfQuotient (-TY, TA));
2027
- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2028
- if (DstUMvalid) {
2029
- TLVec.push_back (ceilingOfQuotient (DstUM - TY, TA));
2030
- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2031
- }
2032
- }
2033
2040
2034
- if (TLVec.empty () || TUVec.empty ())
2035
- return false ;
2041
+ // At this point, we have the following equations:
2042
+ //
2043
+ // TA*i - TB*j = TC
2044
+ //
2045
+ // Also, we know that the all pairs of (i, j) can be expressed as:
2046
+ //
2047
+ // (TX + k*TB, TY + k*TA)
2048
+ //
2049
+ // where k is an arbitrary integer.
2050
+ auto [TL0, TU0] = inferDomainOfAffine (TB, TX, SrcUM);
2051
+ auto [TL1, TU1] = inferDomainOfAffine (TA, TY, DstUM);
2036
2052
2037
2053
LLVM_DEBUG (dbgs () << " \t TA = " << TA << " \n " );
2038
2054
LLVM_DEBUG (dbgs () << " \t TB = " << TB << " \n " );
2039
2055
2056
+ auto CreateVec = [](const std::optional<APInt> &V0,
2057
+ const std::optional<APInt> &V1) {
2058
+ SmallVector<APInt, 2 > Vec;
2059
+ if (V0)
2060
+ Vec.push_back (*V0);
2061
+ if (V1)
2062
+ Vec.push_back (*V1);
2063
+ return Vec;
2064
+ };
2065
+
2066
+ SmallVector<APInt, 2 > TLVec = CreateVec (TL0, TL1);
2067
+ SmallVector<APInt, 2 > TUVec = CreateVec (TU0, TU1);
2068
+ if (TLVec.empty () || TUVec.empty ())
2069
+ return false ;
2070
+
2040
2071
TL = APIntOps::smax (TLVec.front (), TLVec.back ());
2041
2072
TU = APIntOps::smin (TUVec.front (), TUVec.back ());
2042
2073
LLVM_DEBUG (dbgs () << " \t TL = " << TL << " \n " );
0 commit comments