@@ -1531,6 +1531,62 @@ static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
15311531 return Q;
15321532}
15331533
1534+ // / Given an affine expression of the form A*k + B, where k is an arbitrary
1535+ // / integer, infer the possible range of k based on the known range of the
1536+ // / affine expression. If we know A*k + B is non-negative, i.e.,
1537+ // /
1538+ // / A*k + B >= 0
1539+ // /
1540+ // / we can derive the following inequalities for k when A is positive:
1541+ // /
1542+ // / k >= -B / A
1543+ // /
1544+ // / Since k is an integer, it means k is greater than or equal to the
1545+ // / ceil(-B / A).
1546+ // /
1547+ // / If the upper bound of the affine expression \p UB is passed, the following
1548+ // / inequality can be derived as well:
1549+ // /
1550+ // / A*k + B <= UB
1551+ // /
1552+ // / which leads to:
1553+ // /
1554+ // / k <= (UB - B) / A
1555+ // /
1556+ // / Again, as k is an integer, it means k is less than or equal to the
1557+ // / floor((UB - B) / A).
1558+ // /
1559+ // / The similar logic applies when A is negative, but the inequalities sign flip
1560+ // / while working with them.
1561+ // /
1562+ // / Preconditions: \p A is non-zero, and we know A*k + B is non-negative.
1563+ static std::pair<std::optional<APInt>, std::optional<APInt>>
1564+ inferDomainOfAffine (const APInt &A, const APInt &B,
1565+ const std::optional<APInt> &UB) {
1566+ assert (A != 0 && " A must be non-zero" );
1567+ std::optional<APInt> TL, TU;
1568+ if (A.sgt (0 )) {
1569+ TL = ceilingOfQuotient (-B, A);
1570+ LLVM_DEBUG (dbgs () << " \t Possible TL = " << *TL << " \n " );
1571+ // New bound check - modification to Banerjee's e3 check
1572+ if (UB) {
1573+ // TODO?: Overflow check for UB - B
1574+ TU = floorOfQuotient (*UB - B, A);
1575+ LLVM_DEBUG (dbgs () << " \t Possible TU = " << *TU << " \n " );
1576+ }
1577+ } else {
1578+ TU = floorOfQuotient (-B, A);
1579+ LLVM_DEBUG (dbgs () << " \t Possible TU = " << *TU << " \n " );
1580+ // New bound check - modification to Banerjee's e3 check
1581+ if (UB) {
1582+ // TODO?: Overflow check for UB - B
1583+ TL = ceilingOfQuotient (*UB - B, A);
1584+ LLVM_DEBUG (dbgs () << " \t Possible TL = " << *TL << " \n " );
1585+ }
1586+ }
1587+ return std::make_pair (TL, TU);
1588+ }
1589+
15341590// exactSIVtest -
15351591// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
15361592// where i is an induction variable, c1 and c2 are loop invariant, and a1
@@ -1590,14 +1646,12 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
15901646 LLVM_DEBUG (dbgs () << " \t X = " << X << " , Y = " << Y << " \n " );
15911647
15921648 // since SCEV construction normalizes, LM = 0
1593- APInt UM (Bits, 1 , true );
1594- bool UMValid = false ;
1649+ std::optional<APInt> UM;
15951650 // UM is perhaps unavailable, let's check
15961651 if (const SCEVConstant *CUB =
15971652 collectConstantUpperBound (CurLoop, Delta->getType ())) {
15981653 UM = CUB->getAPInt ();
1599- LLVM_DEBUG (dbgs () << " \t UM = " << UM << " \n " );
1600- UMValid = true ;
1654+ LLVM_DEBUG (dbgs () << " \t UM = " << *UM << " \n " );
16011655 }
16021656
16031657 APInt TU (APInt::getSignedMaxValue (Bits));
@@ -1609,44 +1663,33 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
16091663 LLVM_DEBUG (dbgs () << " \t TX = " << TX << " \n " );
16101664 LLVM_DEBUG (dbgs () << " \t TY = " << TY << " \n " );
16111665
1612- SmallVector<APInt, 2 > TLVec, TUVec;
16131666 APInt TB = BM.sdiv (G);
1614- if (TB.sgt (0 )) {
1615- TLVec.push_back (ceilingOfQuotient (-TX, TB));
1616- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1617- // New bound check - modification to Banerjee's e3 check
1618- if (UMValid) {
1619- TUVec.push_back (floorOfQuotient (UM - TX, TB));
1620- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1621- }
1622- } else {
1623- TUVec.push_back (floorOfQuotient (-TX, TB));
1624- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1625- // New bound check - modification to Banerjee's e3 check
1626- if (UMValid) {
1627- TLVec.push_back (ceilingOfQuotient (UM - TX, TB));
1628- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1629- }
1630- }
1631-
16321667 APInt TA = AM.sdiv (G);
1633- if (TA.sgt (0 )) {
1634- if (UMValid) {
1635- TUVec.push_back (floorOfQuotient (UM - TY, TA));
1636- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1637- }
1638- // New bound check - modification to Banerjee's e3 check
1639- TLVec.push_back (ceilingOfQuotient (-TY, TA));
1640- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1641- } else {
1642- if (UMValid) {
1643- TLVec.push_back (ceilingOfQuotient (UM - TY, TA));
1644- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
1645- }
1646- // New bound check - modification to Banerjee's e3 check
1647- TUVec.push_back (floorOfQuotient (-TY, TA));
1648- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
1649- }
1668+
1669+ // At this point, we have the following equations:
1670+ //
1671+ // TA*i0 - TB*i1 = TC
1672+ //
1673+ // Also, we know that the all pairs of (i0, i1) can be expressed as:
1674+ //
1675+ // (TX + k*TB, TY + k*TA)
1676+ //
1677+ // where k is an arbitrary integer.
1678+ auto [TL0, TU0] = inferDomainOfAffine (TB, TX, UM);
1679+ auto [TL1, TU1] = inferDomainOfAffine (TA, TY, UM);
1680+
1681+ auto CreateVec = [](const std::optional<APInt> &V0,
1682+ const std::optional<APInt> &V1) {
1683+ SmallVector<APInt, 2 > Vec;
1684+ if (V0)
1685+ Vec.push_back (*V0);
1686+ if (V1)
1687+ Vec.push_back (*V1);
1688+ return Vec;
1689+ };
1690+
1691+ SmallVector<APInt, 2 > TLVec = CreateVec (TL0, TL1);
1692+ SmallVector<APInt, 2 > TUVec = CreateVec (TU0, TU1);
16501693
16511694 LLVM_DEBUG (dbgs () << " \t TA = " << TA << " \n " );
16521695 LLVM_DEBUG (dbgs () << " \t TB = " << TB << " \n " );
@@ -1967,24 +2010,20 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
19672010 LLVM_DEBUG (dbgs () << " \t X = " << X << " , Y = " << Y << " \n " );
19682011
19692012 // since SCEV construction seems to normalize, LM = 0
1970- APInt SrcUM (Bits, 1 , true );
1971- bool SrcUMvalid = false ;
2013+ std::optional<APInt> SrcUM;
19722014 // SrcUM is perhaps unavailable, let's check
19732015 if (const SCEVConstant *UpperBound =
19742016 collectConstantUpperBound (SrcLoop, Delta->getType ())) {
19752017 SrcUM = UpperBound->getAPInt ();
1976- LLVM_DEBUG (dbgs () << " \t SrcUM = " << SrcUM << " \n " );
1977- SrcUMvalid = true ;
2018+ LLVM_DEBUG (dbgs () << " \t SrcUM = " << *SrcUM << " \n " );
19782019 }
19792020
1980- APInt DstUM (Bits, 1 , true );
1981- bool DstUMvalid = false ;
2021+ std::optional<APInt> DstUM;
19822022 // UM is perhaps unavailable, let's check
19832023 if (const SCEVConstant *UpperBound =
19842024 collectConstantUpperBound (DstLoop, Delta->getType ())) {
19852025 DstUM = UpperBound->getAPInt ();
1986- LLVM_DEBUG (dbgs () << " \t DstUM = " << DstUM << " \n " );
1987- DstUMvalid = true ;
2026+ LLVM_DEBUG (dbgs () << " \t DstUM = " << *DstUM << " \n " );
19882027 }
19892028
19902029 APInt TU (APInt::getSignedMaxValue (Bits));
@@ -1996,47 +2035,39 @@ bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
19962035 LLVM_DEBUG (dbgs () << " \t TX = " << TX << " \n " );
19972036 LLVM_DEBUG (dbgs () << " \t TY = " << TY << " \n " );
19982037
1999- SmallVector<APInt, 2 > TLVec, TUVec;
20002038 APInt TB = BM.sdiv (G);
2001- if (TB.sgt (0 )) {
2002- TLVec.push_back (ceilingOfQuotient (-TX, TB));
2003- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2004- if (SrcUMvalid) {
2005- TUVec.push_back (floorOfQuotient (SrcUM - TX, TB));
2006- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2007- }
2008- } else {
2009- TUVec.push_back (floorOfQuotient (-TX, TB));
2010- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2011- if (SrcUMvalid) {
2012- TLVec.push_back (ceilingOfQuotient (SrcUM - TX, TB));
2013- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2014- }
2015- }
2016-
20172039 APInt TA = AM.sdiv (G);
2018- if (TA.sgt (0 )) {
2019- TLVec.push_back (ceilingOfQuotient (-TY, TA));
2020- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2021- if (DstUMvalid) {
2022- TUVec.push_back (floorOfQuotient (DstUM - TY, TA));
2023- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2024- }
2025- } else {
2026- TUVec.push_back (floorOfQuotient (-TY, TA));
2027- LLVM_DEBUG (dbgs () << " \t Possible TU = " << TUVec.back () << " \n " );
2028- if (DstUMvalid) {
2029- TLVec.push_back (ceilingOfQuotient (DstUM - TY, TA));
2030- LLVM_DEBUG (dbgs () << " \t Possible TL = " << TLVec.back () << " \n " );
2031- }
2032- }
20332040
2034- if (TLVec.empty () || TUVec.empty ())
2035- return false ;
2041+ // At this point, we have the following equations:
2042+ //
2043+ // TA*i - TB*j = TC
2044+ //
2045+ // Also, we know that the all pairs of (i, j) can be expressed as:
2046+ //
2047+ // (TX + k*TB, TY + k*TA)
2048+ //
2049+ // where k is an arbitrary integer.
2050+ auto [TL0, TU0] = inferDomainOfAffine (TB, TX, SrcUM);
2051+ auto [TL1, TU1] = inferDomainOfAffine (TA, TY, DstUM);
20362052
20372053 LLVM_DEBUG (dbgs () << " \t TA = " << TA << " \n " );
20382054 LLVM_DEBUG (dbgs () << " \t TB = " << TB << " \n " );
20392055
2056+ auto CreateVec = [](const std::optional<APInt> &V0,
2057+ const std::optional<APInt> &V1) {
2058+ SmallVector<APInt, 2 > Vec;
2059+ if (V0)
2060+ Vec.push_back (*V0);
2061+ if (V1)
2062+ Vec.push_back (*V1);
2063+ return Vec;
2064+ };
2065+
2066+ SmallVector<APInt, 2 > TLVec = CreateVec (TL0, TL1);
2067+ SmallVector<APInt, 2 > TUVec = CreateVec (TU0, TU1);
2068+ if (TLVec.empty () || TUVec.empty ())
2069+ return false ;
2070+
20402071 TL = APIntOps::smax (TLVec.front (), TLVec.back ());
20412072 TU = APIntOps::smin (TUVec.front (), TUVec.back ());
20422073 LLVM_DEBUG (dbgs () << " \t TL = " << TL << " \n " );
0 commit comments