llvm · sebpop · May 19, 2025 · Nov 14, 2024 · Jan 17, 2025 · Feb 26, 2025
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -52,6 +52,8 @@ namespace llvm {
   class ScalarEvolution;
   class SCEV;
   class SCEVConstant;
+  class SCEVPredicate;
+  class SCEVUnionPredicate;
   class raw_ostream;
 
   /// Dependence - This class represents a dependence between two memory
@@ -349,12 +351,14 @@ namespace llvm {
     const SCEV *getSplitIteration(const Dependence &Dep, unsigned Level);
 
     Function *getFunction() const { return F; }
+    SCEVUnionPredicate getRuntimeAssumptions();
 
   private:
     AAResults *AA;
     ScalarEvolution *SE;
     LoopInfo *LI;
     Function *F;
+    SmallVector<const SCEVPredicate *, 4> Assumptions;
 
     /// Subscript - This private struct represents a pair of subscripts from
     /// a pair of potentially multi-dimensional array references. We use a

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1044,6 +1044,11 @@ class ScalarEvolution {
   bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false,
                               bool OrNegative = false);
 
+  /// Check that memory access offsets in S are multiples of M.  Assumptions
+  /// records the runtime predicates under which S is a multiple of M.
+  bool isKnownMultipleOf(const SCEV *S, uint64_t M,
+                         SmallVectorImpl<const SCEVPredicate *> &Assumptions);
+
   /// Splits SCEV expression \p S into two SCEVs. One of them is obtained from
   /// \p S by substitution of all AddRec sub-expression related to loop \p L
   /// with initial value of that SCEV. The second is obtained from \p S by

diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -206,6 +206,11 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
       }
     }
   }
+  SCEVUnionPredicate Assumptions = DA->getRuntimeAssumptions();
+  if (!Assumptions.isAlwaysTrue()) {
+    OS << "Runtime Assumptions:\n";
+    Assumptions.print(OS, 0);
+  }
 }
 
 void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
@@ -3569,6 +3574,10 @@ bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
          Inv.invalidate<LoopAnalysis>(F, PA);
 }
 
+SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() {
+  return SCEVUnionPredicate(Assumptions, *SE);
+}
+
 // depends -
 // Returns NULL if there is no dependence.
 // Otherwise, return a Dependence with as many details as possible.
@@ -3596,14 +3605,10 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     return std::make_unique<Dependence>(Src, Dst);
   }
 
-  assert(isLoadOrStore(Src) && "instruction is not load or store");
-  assert(isLoadOrStore(Dst) && "instruction is not load or store");
-  Value *SrcPtr = getLoadStorePointerOperand(Src);
-  Value *DstPtr = getLoadStorePointerOperand(Dst);
+  const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
+  const MemoryLocation &SrcLoc = MemoryLocation::get(Src);
 
-  switch (underlyingObjectsAlias(AA, F->getDataLayout(),
-                                 MemoryLocation::get(Dst),
-                                 MemoryLocation::get(Src))) {
+  switch (underlyingObjectsAlias(AA, F->getDataLayout(), DstLoc, SrcLoc)) {
   case AliasResult::MayAlias:
   case AliasResult::PartialAlias:
     // cannot analyse objects if we don't understand their aliasing.
@@ -3617,21 +3622,22 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     break; // The underlying objects alias; test accesses for dependence.
   }
 
-  // establish loop nesting levels
-  establishNestingLevels(Src, Dst);
-  LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
-  LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
-
-  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
-  ++TotalArrayPairs;
+  if (DstLoc.Size != SrcLoc.Size) {
+    // The dependence test gets confused if the size of the memory accesses
+    // differ.
+    LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
+    return std::make_unique<Dependence>(Src, Dst);
+  }
 
-  unsigned Pairs = 1;
-  SmallVector<Subscript, 2> Pair(Pairs);
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
   const SCEV *DstSCEV = SE->getSCEV(DstPtr);
   LLVM_DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
   LLVM_DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
-  if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {
+  const SCEV *SrcBase = SE->getPointerBase(SrcSCEV);
+  const SCEV *DstBase = SE->getPointerBase(DstSCEV);
+  if (SrcBase != DstBase) {
     // If two pointers have different bases, trying to analyze indexes won't
     // work; we can't compare them to each other. This can happen, for example,
     // if one is produced by an LCSSA PHI node.
@@ -3641,6 +3647,31 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
     return std::make_unique<Dependence>(Src, Dst);
   }
+
+  uint64_t EltSize = SrcLoc.Size.toRaw();
+  assert(EltSize == DstLoc.Size.toRaw() && "Array element size differ");
+
+  const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
+  const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
+
+  if (Src != Dst) {
+    // Check that memory access offsets are multiples of element sizes.
+    if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assumptions) ||
+        !SE->isKnownMultipleOf(DstEv, EltSize, Assumptions)) {
+      LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
+      return std::make_unique<Dependence>(Src, Dst);
+    }
+  }
+
+  establishNestingLevels(Src, Dst);
+  LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
+  LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
+
+  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+  ++TotalArrayPairs;
+
+  unsigned Pairs = 1;
+  SmallVector<Subscript, 2> Pair(Pairs);
   Pair[0].Src = SrcSCEV;
   Pair[0].Dst = DstSCEV;
 

@@ -10971,6 +10971,67 @@ bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero,
   return all_of(Mul->operands(), NonRecursive) && (OrZero || isKnownNonZero(S));
 }
 
+bool ScalarEvolution::isKnownMultipleOf(
+    const SCEV *S, uint64_t M,
+    SmallVectorImpl<const SCEVPredicate *> &Assumptions) {
+  if (M == 0)
+    return false;
+  if (M == 1)
+    return true;
+
+  // Recursively check AddRec operands.
+  if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
+    return isKnownMultipleOf(AddRec->getStart(), M, Assumptions) &&
+           isKnownMultipleOf(AddRec->getStepRecurrence(*this), M, Assumptions);
+
+  // For a constant, check that "S % M == 0".
+  if (auto *Cst = dyn_cast<SCEVConstant>(S)) {
+    APInt C = Cst->getAPInt();
+    return C.urem(M) == 0;
+  }
+
+  // Basic tests have failed.
+  // Record "S % M == 0" in the runtime Assumptions.
+  auto recordRuntimePredicate = [&](const SCEV *S) -> void {
+    auto *STy = dyn_cast<IntegerType>(S->getType());
+    const SCEV *SmodM =
+        getURemExpr(S, getConstant(ConstantInt::get(STy, M, false)));
+    const SCEV *Zero = getZero(STy);
+
+    // Check whether "S % M == 0" is known at compile time.
+    if (isKnownPredicate(ICmpInst::ICMP_EQ, SmodM, Zero))
+      return;
+
+    const SCEVPredicate *P =
+        getComparePredicate(ICmpInst::ICMP_EQ, SmodM, Zero);
+
+    // Detect redundant predicates.
+    for (auto *A : Assumptions)
+      if (A->implies(P, *this))
+        return;
+
+    Assumptions.push_back(P);
+    return;
+  };
+
+  // Expressions like "n".
+  if (isa<SCEVUnknown>(S)) {
+    recordRuntimePredicate(S);
+    return true;
+  }
+
+  // Expressions like "n + 1" and "n * 3".
+  if (isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S)) {
+    if (SCEVExprContains(S, [](const SCEV *X) { return isa<SCEVUnknown>(X); }))
+      recordRuntimePredicate(S);
+    return true;
+  }
+
+  LLVM_DEBUG(dbgs() << "SCEV node not handled yet in isKnownMultipleOf: " << *S
+                    << "\n");
+  return false;
+}
+
 std::pair<const SCEV *, const SCEV *>
 ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) {
   // Compute SCEV on entry of loop L.

diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1129,7 +1129,11 @@ struct LoopFuser {
         }
       }
     }
-    return true;
+    SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
+    // Fail if the dependence analysis has runtime assumptions.
+    // FIXME: do loop versioning to keep the original loop, and transform the
+    // loop under the runtime assumptions.
+    return Assumptions.isAlwaysTrue();
   }
 
   // Returns true if the instruction \p I can be sunk to the top of the exit
@@ -1172,7 +1176,11 @@ struct LoopFuser {
       }
     }
 
-    return true;
+    SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
+    // Fail if the dependence analysis has runtime assumptions.
+    // FIXME: do loop versioning to keep the original loop, and transform the
+    // loop under the runtime assumptions.
+    return Assumptions.isAlwaysTrue();
   }
 
   /// Collect instructions in the \p FC1 Preheader that can be hoisted
@@ -1420,7 +1428,11 @@ struct LoopFuser {
               return false;
             }
 
-    return true;
+    SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
+    // Fail if the dependence analysis has runtime assumptions.
+    // FIXME: do loop versioning to keep the original loop, and transform the
+    // loop under the runtime assumptions.
+    return Assumptions.isAlwaysTrue();
   }
 
   /// Determine if two fusion candidates are adjacent in the CFG.

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -220,7 +220,11 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
     }
   }
 
-  return true;
+  SCEVUnionPredicate Assumptions = DI->getRuntimeAssumptions();
+  // Fail if the dependence analysis has runtime assumptions.
+  // FIXME: do loop versioning to keep the original loop, and transform the
+  // loop under the runtime assumptions.
+  return Assumptions.isAlwaysTrue();
 }
 
 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
@@ -1834,6 +1838,12 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
   std::unique_ptr<CacheCost> CC =
       CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
 
+  SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
+  // Early fail when the dependence analysis has runtime assumptions.
+  // FIXME: this could be handled by versioning the loop.
+  if (!Assumptions.isAlwaysTrue())
+    return PreservedAnalyses::all();
+
   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
     return PreservedAnalyses::all();
   U.markLoopNestChanged(true);

diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -800,7 +800,12 @@ checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
     EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(),
                                  CurrentLoadsAndStores.end());
   }
-  return true;
+
+  SCEVUnionPredicate Assumptions = DI.getRuntimeAssumptions();
+  // Fail if the dependence analysis has runtime assumptions.
+  // FIXME: do loop versioning to keep the original loop, and transform the
+  // loop under the runtime assumptions.
+  return Assumptions.isAlwaysTrue();
 }
 
 static bool isEligibleLoopForm(const Loop &Root) {

diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentAccessSize.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentAccessSize.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN: | FileCheck %s
+
+; The dependence test does not handle array accesses of different sizes: i32 and i64.
+; Bug 16183 - https://github.com/llvm/llvm-project/issues/16183
+
+define i64 @bug16183_alias(ptr nocapture %A) {
+; CHECK-LABEL: 'bug16183_alias'
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 4 --> Dst: %0 = load i64, ptr %A, align 8
+; CHECK-NEXT:    da analyze - confused!
+; CHECK-NEXT:  Src: %0 = load i64, ptr %A, align 8 --> Dst: %0 = load i64, ptr %A, align 8
+; CHECK-NEXT:    da analyze - none!
+;
+entry:
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 1
+  store i32 2, ptr %arrayidx, align 4
+  %0 = load i64, ptr %A, align 8
+  ret i64 %0
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN: | FileCheck %s
+
+; The dependence test does not handle array accesses with difference between array accesses
+; is not a multiple of the array element size.
+
+; In this test, the element size is i32 = 4 bytes and the difference between the
+; load and the store is 2 bytes.
+
+define i32 @alias_with_different_offsets(ptr nocapture %A) {
+; CHECK-LABEL: 'alias_with_different_offsets'
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %A, align 1
+; CHECK-NEXT:    da analyze - confused!
+; CHECK-NEXT:  Src: %0 = load i32, ptr %A, align 1 --> Dst: %0 = load i32, ptr %A, align 1
+; CHECK-NEXT:    da analyze - none!
+;
+entry:
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 2
+  store i32 2, ptr %arrayidx, align 1
+  %0 = load i32, ptr %A, align 1
+  ret i32 %0
+}
+
+define i32 @alias_with_parametric_offset(ptr nocapture %A, i64 %n) {
+; CHECK-LABEL: 'alias_with_parametric_offset'
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %A, align 1
+; CHECK-NEXT:    da analyze - flow [|<]!
+; CHECK-NEXT:  Src: %0 = load i32, ptr %A, align 1 --> Dst: %0 = load i32, ptr %A, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Runtime Assumptions:
+; CHECK-NEXT:  Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
+;
+entry:
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %n
+  store i32 2, ptr %arrayidx, align 1
+  %0 = load i32, ptr %A, align 1
+  ret i32 %0
+}
+
+define i32 @alias_with_parametric_expr(ptr nocapture %A, i64 %n, i64 %m) {
+; CHECK-LABEL: 'alias_with_parametric_expr'
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 1 --> Dst: store i32 2, ptr %arrayidx, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: store i32 2, ptr %arrayidx, align 1 --> Dst: %0 = load i32, ptr %arrayidx1, align 1
+; CHECK-NEXT:    da analyze - flow [|<]!
+; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 1 --> Dst: %0 = load i32, ptr %arrayidx1, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Runtime Assumptions:
+; CHECK-NEXT:  Equal predicate: (zext i2 ((trunc i64 %m to i2) + (-2 * (trunc i64 %n to i2))) to i64) == 0
+; CHECK-NEXT:  Equal predicate: (zext i2 (-2 + (trunc i64 %m to i2)) to i64) == 0
+;
+entry:
+  %mul = mul nsw i64 %n, 10
+  %add = add nsw i64 %mul, %m
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %add
+  store i32 2, ptr %arrayidx, align 1
+
+  %add1 = add nsw i64 %m, 42
+  %arrayidx1 = getelementptr inbounds i8, ptr %A, i64 %add1
+  %0 = load i32, ptr %arrayidx1, align 1
+  ret i32 %0
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
@@ -44,6 +44,9 @@ define void @test(ptr %A, ptr %B, i1 %arg, i32 %n, i32 %m) #0 align 2 {
 ; CHECK-NEXT:    da analyze - input [* S S|<]!
 ; CHECK-NEXT:  Src: %v32 = load <32 x i32>, ptr %v30, align 128 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
 ; CHECK-NEXT:    da analyze - consistent input [0 S S]!
+; CHECK-NEXT:  Runtime Assumptions:
+; CHECK-NEXT:  Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
+; CHECK-NEXT:  Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
 ;
 entry:
   %v1 = load i32, ptr %B, align 4