@@ -1917,6 +1917,74 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
19171917 });
19181918}
19191919
1920+ static bool isAffectedByLoop (const SCEV *Expr, const Loop *L,
1921+ ScalarEvolution &SE) {
1922+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
1923+ if (!AddRec)
1924+ return false ;
1925+
1926+ if (AddRec->getLoop () == L)
1927+ return true ;
1928+
1929+ const SCEV *Start = AddRec->getStart ();
1930+ const SCEV *Step = AddRec->getStepRecurrence (SE);
1931+ return isAffectedByLoop (Start, L, SE) || isAffectedByLoop (Step, L, SE);
1932+ }
1933+
1934+ // Consider the following case:
1935+ //
1936+ // for (int j = 0; j < 256; j++) // Loop j
1937+ // for (int i = j+1; i < 256; i++)// Loop i
1938+ // a[i] -= aa[j][i] * a[j];
1939+ //
1940+ // Given that SCEV of &a[j] is {@a,+,4}<Loop j>, a[j] will be treated as scalar
1941+ // when vectorizing Loop i. If the accessing size of a[j] <= Dist(a[j], a[i]),
1942+ // there is no overlapped and can be vectorized.
1943+ //
1944+ // In this case, accessing size of a[j] is 4 byte(float) and Dist(a[j], a[i])
1945+ // is {4,+,4} which bring the minimum distance as 4.
1946+ //
1947+ // Return true if Dist is equal or greater than the accessing size of Src.
1948+ static bool isSrcNoOverlap (const SCEV *Src, Instruction *AInst,
1949+ const SCEV *Dist, const Loop *InnermostLoop,
1950+ ScalarEvolution &SE) {
1951+ // If the Src is not affected by InnermostLoop, when vectorizing
1952+ // InnermostLoop, Src will be treated as scalar instead of widening to vector.
1953+ if (isAffectedByLoop (Src, InnermostLoop, SE))
1954+ return false ;
1955+
1956+ if (!isa<SCEVAddRecExpr>(Dist))
1957+ return false ;
1958+
1959+ auto *Diff = cast<SCEVAddRecExpr>(Dist);
1960+
1961+ if (Diff->getLoop () != InnermostLoop)
1962+ return false ;
1963+
1964+ if (!isa<SCEVConstant>(Diff->getStart ()))
1965+ return false ;
1966+
1967+ if (!isa<SCEVConstant>(Diff->getStepRecurrence (SE)))
1968+ return false ;
1969+
1970+ const SCEVConstant *DiffInc = cast<SCEVConstant>(Diff->getStepRecurrence (SE));
1971+ if (DiffInc->getAPInt ().isNegative ())
1972+ return false ;
1973+
1974+ // If the step of Diff is positve and the Start of diff is constant,
1975+ // we can get the minimum diff between Src and Dst.
1976+ const SCEVConstant *MinDiff = cast<SCEVConstant>(Diff->getStart ());
1977+
1978+ // If we get here, Src won't be vectorized, so we only need to consider the
1979+ // scalar load/store size. If the minimum diff between Src and Dst is equal
1980+ // or greater than the load/store size, there is no overlapped.
1981+ if (MinDiff->getAPInt ().getSExtValue () >=
1982+ getLoadStoreType (AInst)->getScalarSizeInBits () / 8 )
1983+ return true ;
1984+
1985+ return false ;
1986+ }
1987+
19201988// Get the dependence distance, stride, type size in whether i is a write for
19211989// the dependence between A and B. Returns a DepType, if we can prove there's
19221990// no dependence or the analysis fails. Outlined to lambda to limit he scope
@@ -1979,6 +2047,9 @@ getDependenceDistanceStrideAndSize(
19792047 InnermostLoop))
19802048 return MemoryDepChecker::Dependence::IndirectUnsafe;
19812049
2050+ if (isSrcNoOverlap (Src, AInst, Dist, InnermostLoop, SE))
2051+ return MemoryDepChecker::Dependence::NoDep;
2052+
19822053 // Need accesses with constant stride. We don't want to vectorize
19832054 // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
19842055 // in the address space.
0 commit comments