Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
238bbca
[DA] Dependence analysis does not handle array accesses of different …
sebpop Nov 14, 2024
164a0fa
[DA] check memory offsets are multiples of elements size
sebpop Jan 17, 2025
a479bdf
fix auto type
sebpop Feb 26, 2025
9213c32
convert checkOffsets lambda to static function
sebpop Feb 26, 2025
caf4f8d
remove useless comment
sebpop Feb 26, 2025
468652d
add more comments and examples
sebpop Feb 26, 2025
108d224
add extra slash for function comments
sebpop Feb 27, 2025
c62e1f4
simplify logic with a single return
sebpop Feb 27, 2025
8dcc5a0
use auto on dyn_cast assign for readability
sebpop Feb 27, 2025
92f6b4f
simplify logic for similar code-paths
sebpop Feb 27, 2025
0eae7f0
move isKnownMultipleOf to ScalarEvolution
sebpop Feb 27, 2025
60ba4e6
record runtime assumptions for parametric expressions
sebpop Apr 8, 2025
8c69ebf
disable loop interchange, fusion, and unroll-and-jam on runtime assum…
sebpop Apr 9, 2025
a80c878
handle compile time "s % m != 0" in isKnownMultipleOf
sebpop Apr 14, 2025
bf9fcfd
add testcase from Ryotaro Kasuga's review
sebpop Apr 14, 2025
3dbba8e
record runtime predicates on each Dependence relation
sebpop Apr 14, 2025
d9846e9
add testcases
sebpop Apr 29, 2025
b954282
Revert "disable loop interchange, fusion, and unroll-and-jam on runti…
sebpop May 5, 2025
9a110df
turn off UnderRuntimeAssumptions in depends computation
sebpop May 5, 2025
297ff44
clang-format code around my changes
sebpop May 6, 2025
861ef01
update comments for last wave of reviews
sebpop May 13, 2025
4972776
also reject before or after pointer
sebpop May 13, 2025
338668f
use LocationSize.isPrecise
sebpop May 13, 2025
869b5ff
remove Assumptions.empty branch
sebpop May 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/DependenceAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ namespace llvm {
class ScalarEvolution;
class SCEV;
class SCEVConstant;
class SCEVPredicate;
class SCEVUnionPredicate;
class raw_ostream;

/// Dependence - This class represents a dependence between two memory
Expand Down Expand Up @@ -349,12 +351,14 @@ namespace llvm {
const SCEV *getSplitIteration(const Dependence &Dep, unsigned Level);

Function *getFunction() const { return F; }
SCEVUnionPredicate getRuntimeAssumptions();

private:
AAResults *AA;
ScalarEvolution *SE;
LoopInfo *LI;
Function *F;
SmallVector<const SCEVPredicate *, 4> Assumptions;

/// Subscript - This private struct represents a pair of subscripts from
/// a pair of potentially multi-dimensional array references. We use a
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Analysis/ScalarEvolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,11 @@ class ScalarEvolution {
bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false,
bool OrNegative = false);

/// Check that memory access offsets in S are multiples of M. Assumptions
/// records the runtime predicates under which S is a multiple of M.
bool isKnownMultipleOf(const SCEV *S, uint64_t M,
SmallVectorImpl<const SCEVPredicate *> &Assumptions);

/// Splits SCEV expression \p S into two SCEVs. One of them is obtained from
/// \p S by substitution of all AddRec sub-expression related to loop \p L
/// with initial value of that SCEV. The second is obtained from \p S by
Expand Down
65 changes: 48 additions & 17 deletions llvm/lib/Analysis/DependenceAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,11 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
}
}
}
SCEVUnionPredicate Assumptions = DA->getRuntimeAssumptions();
if (!Assumptions.isAlwaysTrue()) {
OS << "Runtime Assumptions:\n";
Assumptions.print(OS, 0);
}
}

void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
Expand Down Expand Up @@ -3569,6 +3574,10 @@ bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
Inv.invalidate<LoopAnalysis>(F, PA);
}

SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() {
return SCEVUnionPredicate(Assumptions, *SE);
}

// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
Expand Down Expand Up @@ -3596,14 +3605,10 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
return std::make_unique<Dependence>(Src, Dst);
}

assert(isLoadOrStore(Src) && "instruction is not load or store");
assert(isLoadOrStore(Dst) && "instruction is not load or store");
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
const MemoryLocation &SrcLoc = MemoryLocation::get(Src);

switch (underlyingObjectsAlias(AA, F->getDataLayout(),
MemoryLocation::get(Dst),
MemoryLocation::get(Src))) {
switch (underlyingObjectsAlias(AA, F->getDataLayout(), DstLoc, SrcLoc)) {
case AliasResult::MayAlias:
case AliasResult::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
Expand All @@ -3617,21 +3622,22 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
break; // The underlying objects alias; test accesses for dependence.
}

// establish loop nesting levels
establishNestingLevels(Src, Dst);
LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");

FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;
if (DstLoc.Size != SrcLoc.Size) {
// The dependence test gets confused if the size of the memory accesses
// differ.
LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
return std::make_unique<Dependence>(Src, Dst);
}

unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {
const SCEV *SrcBase = SE->getPointerBase(SrcSCEV);
const SCEV *DstBase = SE->getPointerBase(DstSCEV);
if (SrcBase != DstBase) {
// If two pointers have different bases, trying to analyze indexes won't
// work; we can't compare them to each other. This can happen, for example,
// if one is produced by an LCSSA PHI node.
Expand All @@ -3641,6 +3647,31 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
return std::make_unique<Dependence>(Src, Dst);
}

uint64_t EltSize = SrcLoc.Size.toRaw();
assert(EltSize == DstLoc.Size.toRaw() && "Array element size differ");

const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);

if (Src != Dst) {
// Check that memory access offsets are multiples of element sizes.
if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assumptions) ||
!SE->isKnownMultipleOf(DstEv, EltSize, Assumptions)) {
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
return std::make_unique<Dependence>(Src, Dst);
}
}

establishNestingLevels(Src, Dst);
LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");

FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;

unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
Pair[0].Src = SrcSCEV;
Pair[0].Dst = DstSCEV;

Expand Down
61 changes: 61 additions & 0 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10971,6 +10971,67 @@ bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero,
return all_of(Mul->operands(), NonRecursive) && (OrZero || isKnownNonZero(S));
}

bool ScalarEvolution::isKnownMultipleOf(
const SCEV *S, uint64_t M,
SmallVectorImpl<const SCEVPredicate *> &Assumptions) {
if (M == 0)
return false;
Comment on lines +10977 to +10978
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: What if S is also 0? This is a very special case, so I don't think we need to handle such a case carefully for now, but I would prefer to leave some TODO/FIXME comments for such a case. (For example, I think it's also reasonable to append a predicate for S == 0 to Assumptions and return true here.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

S==0 in the context of getMinusSCEV would mean an eltSize of 0, which means memory will never be accessed which means there never is a dependency. But there isn't necessarily this context for the caller of isKnownMultipleOf.

This function can always return false, since it is returns true of if it is a known multiple. The case 0/0 is still questionable, so it is safe to return false here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But there isn't necessarily this context for the caller of isKnownMultipleOf

Yes, this is exactly what I'm concerned about.

This function can always return false, since it is returns true of if it is a known multiple. The case 0/0 is still questionable, so it is safe to return false here.

I see, that's make sense to me.

if (M == 1)
return true;

// Recursively check AddRec operands.
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
return isKnownMultipleOf(AddRec->getStart(), M, Assumptions) &&
isKnownMultipleOf(AddRec->getStepRecurrence(*this), M, Assumptions);

// For a constant, check that "S % M == 0".
if (auto *Cst = dyn_cast<SCEVConstant>(S)) {
APInt C = Cst->getAPInt();
return C.urem(M) == 0;
}

// Basic tests have failed.
// Record "S % M == 0" in the runtime Assumptions.
auto recordRuntimePredicate = [&](const SCEV *S) -> void {
auto *STy = dyn_cast<IntegerType>(S->getType());
const SCEV *SmodM =
getURemExpr(S, getConstant(ConstantInt::get(STy, M, false)));
const SCEV *Zero = getZero(STy);

// Check whether "S % M == 0" is known at compile time.
if (isKnownPredicate(ICmpInst::ICMP_EQ, SmodM, Zero))
return;

const SCEVPredicate *P =
getComparePredicate(ICmpInst::ICMP_EQ, SmodM, Zero);

// Detect redundant predicates.
for (auto *A : Assumptions)
if (A->implies(P, *this))
return;

Assumptions.push_back(P);
return;
};

// Expressions like "n".
if (isa<SCEVUnknown>(S)) {
recordRuntimePredicate(S);
return true;
}

// Expressions like "n + 1" and "n * 3".
if (isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S)) {
if (SCEVExprContains(S, [](const SCEV *X) { return isa<SCEVUnknown>(X); }))
recordRuntimePredicate(S);
return true;
}

LLVM_DEBUG(dbgs() << "SCEV node not handled yet in isKnownMultipleOf: " << *S
<< "\n");
return false;
}

std::pair<const SCEV *, const SCEV *>
ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) {
// Compute SCEV on entry of loop L.
Expand Down
18 changes: 15 additions & 3 deletions llvm/lib/Transforms/Scalar/LoopFuse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1129,7 +1129,11 @@ struct LoopFuser {
}
}
}
return true;
SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
// Fail if the dependence analysis has runtime assumptions.
// FIXME: do loop versioning to keep the original loop, and transform the
// loop under the runtime assumptions.
return Assumptions.isAlwaysTrue();
}

// Returns true if the instruction \p I can be sunk to the top of the exit
Expand Down Expand Up @@ -1172,7 +1176,11 @@ struct LoopFuser {
}
}

return true;
SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
// Fail if the dependence analysis has runtime assumptions.
// FIXME: do loop versioning to keep the original loop, and transform the
// loop under the runtime assumptions.
return Assumptions.isAlwaysTrue();
}

/// Collect instructions in the \p FC1 Preheader that can be hoisted
Expand Down Expand Up @@ -1420,7 +1428,11 @@ struct LoopFuser {
return false;
}

return true;
SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
// Fail if the dependence analysis has runtime assumptions.
// FIXME: do loop versioning to keep the original loop, and transform the
// loop under the runtime assumptions.
return Assumptions.isAlwaysTrue();
}

/// Determine if two fusion candidates are adjacent in the CFG.
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,11 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
}
}

return true;
SCEVUnionPredicate Assumptions = DI->getRuntimeAssumptions();
// Fail if the dependence analysis has runtime assumptions.
// FIXME: do loop versioning to keep the original loop, and transform the
// loop under the runtime assumptions.
return Assumptions.isAlwaysTrue();
}

// A loop is moved from index 'from' to an index 'to'. Update the Dependence
Expand Down Expand Up @@ -1834,6 +1838,12 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
std::unique_ptr<CacheCost> CC =
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);

SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
// Early fail when the dependence analysis has runtime assumptions.
// FIXME: this could be handled by versioning the loop.
if (!Assumptions.isAlwaysTrue())
return PreservedAnalyses::all();

if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
U.markLoopNestChanged(true);
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,12 @@ checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(),
CurrentLoadsAndStores.end());
}
return true;

SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
// Fail if the dependence analysis has runtime assumptions.
// FIXME: do loop versioning to keep the original loop, and transform the
// loop under the runtime assumptions.
return Assumptions.isAlwaysTrue();
}

static bool isEligibleLoopForm(const Loop &Root) {
Expand Down
22 changes: 22 additions & 0 deletions llvm/test/Analysis/DependenceAnalysis/DifferentAccessSize.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
; RUN: | FileCheck %s

; The dependence test does not handle array accesses of different sizes: i32 and i64.
; Bug 16183 - https://github.com/llvm/llvm-project/issues/16183

define i64 @bug16183_alias(ptr nocapture %A) {
; CHECK-LABEL: 'bug16183_alias'
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: %0 = load i64, ptr %A, align 8
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i64, ptr %A, align 8 --> Dst: %0 = load i64, ptr %A, align 8
; CHECK-NEXT: da analyze - none!
;
entry:
%arrayidx = getelementptr inbounds i32, ptr %A, i64 1
store i32 2, ptr %arrayidx, align 4
%0 = load i64, ptr %A, align 8
ret i64 %0
}
67 changes: 67 additions & 0 deletions llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
; RUN: | FileCheck %s

; The dependence test does not handle array accesses with difference between array accesses
; is not a multiple of the array element size.

; In this test, the element size is i32 = 4 bytes and the difference between the
; load and the store is 2 bytes.

define i32 @alias_with_different_offsets(ptr nocapture %A) {
; CHECK-LABEL: 'alias_with_different_offsets'
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %A, align 1
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %A, align 1 --> Dst: %0 = load i32, ptr %A, align 1
; CHECK-NEXT: da analyze - none!
;
entry:
%arrayidx = getelementptr inbounds i8, ptr %A, i64 2
store i32 2, ptr %arrayidx, align 1
%0 = load i32, ptr %A, align 1
ret i32 %0
}

define i32 @alias_with_parametric_offset(ptr nocapture %A, i64 %n) {
; CHECK-LABEL: 'alias_with_parametric_offset'
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %A, align 1
; CHECK-NEXT: da analyze - flow [|<]!
; CHECK-NEXT: Src: %0 = load i32, ptr %A, align 1 --> Dst: %0 = load i32, ptr %A, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
;
entry:
%arrayidx = getelementptr inbounds i8, ptr %A, i64 %n
store i32 2, ptr %arrayidx, align 1
%0 = load i32, ptr %A, align 1
ret i32 %0
}

define i32 @alias_with_parametric_expr(ptr nocapture %A, i64 %n, i64 %m) {
; CHECK-LABEL: 'alias_with_parametric_expr'
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %arrayidx1, align 1
; CHECK-NEXT: da analyze - flow [|<]!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx1, align 1 --> Dst: %0 = load i32, ptr %arrayidx1, align 1
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 %m to i2) + (-2 * (trunc i64 %n to i2))) to i64) == 0
; CHECK-NEXT: Equal predicate: (zext i2 (-2 + (trunc i64 %m to i2)) to i64) == 0
;
entry:
%mul = mul nsw i64 %n, 10
%add = add nsw i64 %mul, %m
%arrayidx = getelementptr inbounds i8, ptr %A, i64 %add
store i32 2, ptr %arrayidx, align 1

%add1 = add nsw i64 %m, 42
%arrayidx1 = getelementptr inbounds i8, ptr %A, i64 %add1
%0 = load i32, ptr %arrayidx1, align 1
ret i32 %0
}
3 changes: 3 additions & 0 deletions llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ define void @test(ptr %A, ptr %B, i1 %arg, i32 %n, i32 %m) #0 align 2 {
; CHECK-NEXT: da analyze - input [* S S|<]!
; CHECK-NEXT: Src: %v32 = load <32 x i32>, ptr %v30, align 128 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
; CHECK-NEXT: da analyze - consistent input [0 S S]!
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
; CHECK-NEXT: Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
;
entry:
%v1 = load i32, ptr %B, align 4
Expand Down