Skip to content

Commit 34ca142

Browse files
committed
[LAA] Determine Dst and Src overlapping by SCEV of Src and Dist
Consider the following case: for (int j = 0; j < 256; j++) // Loop j for (int i = j+1; i < 256; i++)// Loop i a[i] -= aa[j][i] * a[j]; Given that SCEV of &a[j] is {@A,+,4}<Loop j>, a[j] will be treated as scalar when vectorizing Loop i. If the accessing size of a[j] <= Dist(a[j], a[i]), there is no overlapped and can be vectorized. In this case, accessing size of a[j] is 4 byte(float) and Dist(a[j], a[i]) is {4,+,4} which bring the minimum distance as 4
1 parent b06f1a8 commit 34ca142

File tree

2 files changed

+81
-3
lines changed

2 files changed

+81
-3
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,6 +1917,74 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
19171917
});
19181918
}
19191919

1920+
static bool isAffectedByLoop(const SCEV *Expr, const Loop *L,
1921+
ScalarEvolution &SE) {
1922+
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
1923+
if (!AddRec)
1924+
return false;
1925+
1926+
if (AddRec->getLoop() == L)
1927+
return true;
1928+
1929+
const SCEV *Start = AddRec->getStart();
1930+
const SCEV *Step = AddRec->getStepRecurrence(SE);
1931+
return isAffectedByLoop(Start, L, SE) || isAffectedByLoop(Step, L, SE);
1932+
}
1933+
1934+
// Consider the following case:
1935+
//
1936+
// for (int j = 0; j < 256; j++) // Loop j
1937+
// for (int i = j+1; i < 256; i++)// Loop i
1938+
// a[i] -= aa[j][i] * a[j];
1939+
//
1940+
// Given that SCEV of &a[j] is {@a,+,4}<Loop j>, a[j] will be treated as scalar
1941+
// when vectorizing Loop i. If the accessing size of a[j] <= Dist(a[j], a[i]),
1942+
// there is no overlapped and can be vectorized.
1943+
//
1944+
// In this case, accessing size of a[j] is 4 byte(float) and Dist(a[j], a[i])
1945+
// is {4,+,4} which bring the minimum distance as 4.
1946+
//
1947+
// Return true if Dist is equal or greater than the accessing size of Src.
1948+
static bool isSrcNoOverlap(const SCEV *Src, Instruction *AInst,
1949+
const SCEV *Dist, const Loop *InnermostLoop,
1950+
ScalarEvolution &SE) {
1951+
// If the Src is not affected by InnermostLoop, when vectorizing
1952+
// InnermostLoop, Src will be treated as scalar instead of widening to vector.
1953+
if (isAffectedByLoop(Src, InnermostLoop, SE))
1954+
return false;
1955+
1956+
if (!isa<SCEVAddRecExpr>(Dist))
1957+
return false;
1958+
1959+
auto *Diff = cast<SCEVAddRecExpr>(Dist);
1960+
1961+
if (Diff->getLoop() != InnermostLoop)
1962+
return false;
1963+
1964+
if (!isa<SCEVConstant>(Diff->getStart()))
1965+
return false;
1966+
1967+
if (!isa<SCEVConstant>(Diff->getStepRecurrence(SE)))
1968+
return false;
1969+
1970+
const SCEVConstant *DiffInc = cast<SCEVConstant>(Diff->getStepRecurrence(SE));
1971+
if (DiffInc->getAPInt().isNegative())
1972+
return false;
1973+
1974+
// If the step of Diff is positve and the Start of diff is constant,
1975+
// we can get the minimum diff between Src and Dst.
1976+
const SCEVConstant *MinDiff = cast<SCEVConstant>(Diff->getStart());
1977+
1978+
// If we get here, Src won't be vectorized, so we only need to consider the
1979+
// scalar load/store size. If the minimum diff between Src and Dst is equal
1980+
// or greater than the load/store size, there is no overlapped.
1981+
if (MinDiff->getAPInt().getSExtValue() >=
1982+
getLoadStoreType(AInst)->getScalarSizeInBits() / 8)
1983+
return true;
1984+
1985+
return false;
1986+
}
1987+
19201988
// Get the dependence distance, stride, type size in whether i is a write for
19211989
// the dependence between A and B. Returns a DepType, if we can prove there's
19221990
// no dependence or the analysis fails. Outlined to lambda to limit he scope
@@ -1979,6 +2047,9 @@ getDependenceDistanceStrideAndSize(
19792047
InnermostLoop))
19802048
return MemoryDepChecker::Dependence::IndirectUnsafe;
19812049

2050+
if (isSrcNoOverlap(Src, AInst, Dist, InnermostLoop, SE))
2051+
return MemoryDepChecker::Dependence::NoDep;
2052+
19822053
// Need accesses with constant stride. We don't want to vectorize
19832054
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
19842055
// in the address space.

llvm/test/Transforms/LoopVectorize/vectorize-s115.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,18 @@
33
@aa = global [256 x [256 x float]] zeroinitializer, align 4
44
@a = global [32000 x float] zeroinitializer, align 4
55

6-
;; for (int j = 0; j < 256; j++)
7-
;; for (int i = j+1; i < 256; i++)
6+
;; Given that SCEV of &a[j] is {@a,+,4}<Loop j>, a[j] will be treated as scalar
7+
;; when vectorizing Loop i. If the accessing size of a[j] <= Dist(a[j], a[i]),
8+
;; there is no overlapped and can be vectorized.
9+
;;
10+
;; In this case, accessing size of a[j] is 4 byte(float) and Dist(a[j], a[i])
11+
;; is {4,+,4} which bring the minimum distance as 4.
12+
;;
13+
;; for (int j = 0; j < 256; j++) // Loop j
14+
;; for (int i = j+1; i < 256; i++)// Loop i
815
;; a[i] -= aa[j][i] * a[j];
916

10-
; CHECK-NOT: vector.body:
17+
; CHECK: vector.body:
1118

1219
define signext i32 @s115() {
1320
entry:

0 commit comments

Comments
 (0)