diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index 8fb30925b1ba7..ecac844ea7658 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -16,16 +16,23 @@
 #ifndef LLVM_ANALYSIS_DELINEARIZATION_H
 #define LLVM_ANALYSIS_DELINEARIZATION_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/Compiler.h"
 
 namespace llvm {
+class Function;
 class raw_ostream;
 template <typename T> class SmallVectorImpl;
 class GetElementPtrInst;
 class Instruction;
+class LoopInfo;
 class ScalarEvolution;
 class SCEV;
+class SCEVUnknown;
 
 /// Compute the array dimensions Sizes from the set of Terms extracted from
 /// the memory access function of this SCEVAddRecExpr (second step of
@@ -164,6 +171,84 @@ bool getIndexExpressionsFromGEP(ScalarEvolution &SE,
                                 SmallVectorImpl<const SCEV *> &Subscripts,
                                 SmallVectorImpl<const SCEV *> &Sizes);
 
+/// BatchDelinearization - A wrapper for batch delinearization that caches
+/// results across multiple queries. Similar to BatchAAResults, this class
+/// should be used when analyzing multiple memory accesses to the same base
+/// pointers, as it computes array dimensions once using terms from all
+/// accesses, leading to better precision.
+///
+/// This class collects all memory accesses in a function, groups them by base
+/// pointer, and computes array dimensions for each base pointer using terms
+/// from all accesses. The results are cached for efficient lookups during
+/// dependence analysis.
+///
+/// Usage:
+///   BatchDelinearization BD(F, SE, LI);
+///   BD.populate();  // Compute and cache delinearization info.
+///   // Then pass BD to DependenceInfo or query it directly.
+class LLVM_ABI BatchDelinearization {
+public:
+  BatchDelinearization(Function &F, ScalarEvolution &SE, LoopInfo &LI)
+      : F(F), SE(SE), LI(LI) {}
+
+  /// Populate the cache with delinearization information for all memory
+  /// accesses in the function.
+  void populate();
+
+  /// Check if the cache has been populated.
+  bool isPopulated() const { return Populated; }
+
+  /// Get the cached array sizes for a base pointer.
+  /// Returns nullptr if not found.
+  const SmallVector<const SCEV *, 4> *
+  getArraySizes(const SCEVUnknown *Base) const {
+    auto It = ArraySizes.find(Base);
+    return It != ArraySizes.end() ? &It->second : nullptr;
+  }
+
+  /// Get the cached subscripts for an instruction.
+  /// Returns nullptr if not found.
+  const SmallVector<const SCEV *, 4> *
+  getSubscripts(const Instruction *I) const {
+    auto It = Subscripts.find(I);
+    return It != Subscripts.end() ? &It->second : nullptr;
+  }
+
+  /// Get the cached element size for a base pointer.
+  /// Returns nullptr if not found.
+  const SCEV *getElementSize(const SCEVUnknown *Base) const {
+    auto It = ElementSizes.find(Base);
+    return It != ElementSizes.end() ? It->second : nullptr;
+  }
+
+  /// Get the ScalarEvolution instance.
+  ScalarEvolution &getSE() { return SE; }
+  const ScalarEvolution &getSE() const { return SE; }
+
+  /// Get the LoopInfo instance.
+  LoopInfo &getLI() { return LI; }
+  const LoopInfo &getLI() const { return LI; }
+
+private:
+  Function &F;
+  ScalarEvolution &SE;
+  LoopInfo &LI;
+
+  /// Map from base pointer to computed array dimension sizes.
+  SmallDenseMap<const SCEVUnknown *, SmallVector<const SCEV *, 4>, 8>
+      ArraySizes;
+
+  /// Map from instruction to pre-computed subscripts.
+  SmallDenseMap<const Instruction *, SmallVector<const SCEV *, 4>, 16>
+      Subscripts;
+
+  /// Element size for the array (used for validation).
+  SmallDenseMap<const SCEVUnknown *, const SCEV *, 8> ElementSizes;
+
+  /// Flag indicating whether the cache has been populated.
+  bool Populated = false;
+};
+
 struct DelinearizationPrinterPass
     : public PassInfoMixin<DelinearizationPrinterPass> {
   explicit DelinearizationPrinterPass(raw_ostream &OS);
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 6dec24fc9f104..21828290fcc08 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -49,6 +49,7 @@
 namespace llvm {
 class AAResults;
 template <typename T> class ArrayRef;
+class BatchDelinearization;
 class Loop;
 class LoopInfo;
 class SCEVConstant;
@@ -335,8 +336,9 @@ class LLVM_ABI FullDependence final : public Dependence {
 /// DependenceInfo - This class is the main dependence-analysis driver.
 class DependenceInfo {
 public:
-  DependenceInfo(Function *F, AAResults *AA, ScalarEvolution *SE, LoopInfo *LI)
-      : AA(AA), SE(SE), LI(LI), F(F) {}
+  DependenceInfo(Function *F, AAResults *AA, ScalarEvolution *SE, LoopInfo *LI,
+                 BatchDelinearization *BD = nullptr)
+      : AA(AA), SE(SE), LI(LI), F(F), BatchDelin(BD) {}
 
   /// Handle transitive invalidation when the cached analysis results go away.
   LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -355,11 +357,19 @@ class DependenceInfo {
 
   Function *getFunction() const { return F; }
 
+  /// setBatchDelinearization - Set the BatchDelinearization instance to use
+  /// for cached delinearization results.
+  void setBatchDelinearization(BatchDelinearization *BD) { BatchDelin = BD; }
+
+  /// getBatchDelinearization - Get the BatchDelinearization instance.
+  BatchDelinearization *getBatchDelinearization() const { return BatchDelin; }
+
 private:
   AAResults *AA;
   ScalarEvolution *SE;
   LoopInfo *LI;
   Function *F;
+  BatchDelinearization *BatchDelin;
 
   /// Subscript - This private struct represents a pair of subscripts from
   /// a pair of potentially multi-dimensional array references. We use a
diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp
index 0907a7fb021fc..d76f993ab5332 100644
--- a/llvm/lib/Analysis/DDG.cpp
+++ b/llvm/lib/Analysis/DDG.cpp
@@ -10,6 +10,7 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/DDG.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/Delinearization.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Support/CommandLine.h"
@@ -308,7 +309,9 @@ bool DDGBuilder::shouldCreatePiBlocks() const { return CreatePiBlocks; }
 DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
                                      LoopStandardAnalysisResults &AR) {
   Function *F = L.getHeader()->getParent();
-  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+  BatchDelinearization BD(*F, AR.SE, AR.LI);
+  BD.populate();
+  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI, &BD);
   return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
 }
 AnalysisKey DDGAnalysis::Key;
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 7bf83ccf9c172..5a386840231ed 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -960,3 +960,140 @@ PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
                        &AM.getResult<ScalarEvolutionAnalysis>(F));
   return PreservedAnalyses::all();
 }
+
+//===----------------------------------------------------------------------===//
+// BatchDelinearization Implementation
+//===----------------------------------------------------------------------===//
+
+/// Return true for a Load or Store instruction.
+static bool isLoadOrStore(const Instruction *I) {
+  return isa<LoadInst>(I) || isa<StoreInst>(I);
+}
+
+void BatchDelinearization::populate() {
+  if (Populated)
+    return;
+
+  Populated = true;
+
+  // Step 1: Collect all memory accesses grouped by base pointer.
+  // Map from base pointer to list of (Instruction, AccessFunction) pairs.
+  SmallDenseMap<const SCEVUnknown *,
+                SmallVector<std::pair<Instruction *, const SCEV *>, 4>, 8>
+      AccessesByBase;
+
+  for (Instruction &I : instructions(F)) {
+    if (!isLoadOrStore(&I))
+      continue;
+
+    Value *Ptr = getLoadStorePointerOperand(&I);
+    Loop *L = LI.getLoopFor(I.getParent());
+    const SCEV *AccessFn = SE.getSCEVAtScope(Ptr, L);
+    const SCEVUnknown *Base =
+        dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
+
+    if (!Base)
+      continue;
+
+    // Only consider accesses where the base is loop invariant.
+    if (L && !SE.isLoopInvariant(Base, L))
+      continue;
+
+    AccessesByBase[Base].push_back({&I, AccessFn});
+  }
+
+  // Step 2: For each base pointer, collect terms from ALL accesses and
+  // compute array dimensions once.
+  for (auto &Entry : AccessesByBase) {
+    const SCEVUnknown *Base = Entry.first;
+    auto &Accesses = Entry.second;
+
+    // Skip if there's only one access - no benefit from batch processing.
+    if (Accesses.size() < 2)
+      continue;
+
+    // Determine element size - use the smallest among all accesses.
+    const SCEV *ElemSize = nullptr;
+    for (auto &Access : Accesses) {
+      const SCEV *EltSize = SE.getElementSize(Access.first);
+      if (!ElemSize)
+        ElemSize = EltSize;
+      else if (SE.isKnownPredicate(ICmpInst::ICMP_ULT, EltSize, ElemSize))
+        ElemSize = EltSize;
+    }
+
+    if (!ElemSize)
+      continue;
+
+    ElementSizes[Base] = ElemSize;
+
+    // Collect parametric terms from all accesses to this base.
+    SmallVector<const SCEV *, 8> Terms;
+    for (auto &Access : Accesses) {
+      const SCEV *AccessFn = Access.second;
+      const SCEV *OffsetSCEV = SE.getMinusSCEV(AccessFn, Base);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffsetSCEV);
+      if (AR && AR->isAffine())
+        collectParametricTerms(SE, AR, Terms);
+    }
+
+    // Find array dimensions using all collected terms.
+    SmallVector<const SCEV *, 4> Sizes;
+    findArrayDimensions(SE, Terms, Sizes, ElemSize);
+
+    // Skip if we couldn't determine dimensions.
+    if (Sizes.size() < 2)
+      continue;
+
+    ArraySizes[Base] = Sizes;
+
+    // Pre-compute subscripts for each access using parametric sizes.
+    for (auto &Access : Accesses) {
+      Instruction *Inst = Access.first;
+      const SCEV *AccessFn = Access.second;
+      const SCEV *OffsetSCEV = SE.getMinusSCEV(AccessFn, Base);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffsetSCEV);
+
+      if (!AR || !AR->isAffine())
+        continue;
+
+      SmallVector<const SCEV *, 4> Subs;
+      computeAccessFunctions(SE, AR, Subs, Sizes);
+
+      if (Subs.size() >= 2)
+        Subscripts[Inst] = std::move(Subs);
+    }
+  }
+
+  // Step 3: Try fixed-size array delinearization for accesses not yet cached.
+  // This handles arrays with known compile-time dimensions.
+  for (auto &Entry : AccessesByBase) {
+    auto &Accesses = Entry.second;
+
+    for (auto &Access : Accesses) {
+      Instruction *Inst = Access.first;
+
+      // Skip if already cached from parametric delinearization.
+      if (Subscripts.count(Inst))
+        continue;
+
+      const SCEV *AccessFn = Access.second;
+      const SCEV *ElemSize = SE.getElementSize(Inst);
+      SmallVector<const SCEV *, 4> Subs, Sizes;
+
+      if (delinearizeFixedSizeArray(SE, SE.removePointerBase(AccessFn), Subs,
+                                    Sizes, ElemSize) &&
+          Subs.size() >= 2) {
+        Subscripts[Inst] = std::move(Subs);
+      }
+    }
+  }
+
+  LLVM_DEBUG({
+    dbgs() << "Batch delinearization cache populated:\n";
+    dbgs() << "  Base pointers with cached dimensions: " << ArraySizes.size()
+           << "\n";
+    dbgs() << "  Instructions with cached subscripts: " << Subscripts.size()
+           << "\n";
+  });
+}
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 9b9c80a9b3266..a72ae360de464 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3255,6 +3255,19 @@ bool DependenceInfo::tryDelinearizeFixedSize(
            "expected src and dst scev unknowns to be equal");
   });
 
+  // Try to use cached subscripts from BatchDelinearization.
+  if (BatchDelin && BatchDelin->isPopulated()) {
+    const auto *SrcSubs = BatchDelin->getSubscripts(Src);
+    const auto *DstSubs = BatchDelin->getSubscripts(Dst);
+    if (SrcSubs && DstSubs && SrcSubs->size() >= 2 && DstSubs->size() >= 2 &&
+        SrcSubs->size() == DstSubs->size()) {
+      SrcSubscripts.assign(SrcSubs->begin(), SrcSubs->end());
+      DstSubscripts.assign(DstSubs->begin(), DstSubs->end());
+      LLVM_DEBUG(dbgs() << "Using cached fixed-size delinearization results\n");
+      return true;
+    }
+  }
+
   const SCEV *ElemSize = SE->getElementSize(Src);
   assert(ElemSize == SE->getElementSize(Dst) && "Different element sizes");
   SmallVector<const SCEV *, 4> SrcSizes, DstSizes;
@@ -3328,16 +3341,78 @@ bool DependenceInfo::tryDelinearizeParametricSize(
   if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
     return false;
 
+  SmallVector<const SCEV *, 4> Sizes;
+
+  // Try to use cached results from BatchDelinearization.
+  // This provides better precision by using terms from all accesses.
+  if (BatchDelin && BatchDelin->isPopulated()) {
+    const auto *CachedSizes = BatchDelin->getArraySizes(SrcBase);
+    if (CachedSizes) {
+      // Check element size compatibility.
+      const SCEV *CachedElemSize = BatchDelin->getElementSize(SrcBase);
+      if (CachedElemSize && CachedElemSize == ElementSize) {
+        Sizes.assign(CachedSizes->begin(), CachedSizes->end());
+
+        // Try to use pre-computed subscripts if available.
+        const auto *SrcSubs = BatchDelin->getSubscripts(Src);
+        const auto *DstSubs = BatchDelin->getSubscripts(Dst);
+        if (SrcSubs && DstSubs) {
+          SrcSubscripts.assign(SrcSubs->begin(), SrcSubs->end());
+          DstSubscripts.assign(DstSubs->begin(), DstSubs->end());
+
+          if (SrcSubscripts.size() >= 2 && DstSubscripts.size() >= 2 &&
+              SrcSubscripts.size() == DstSubscripts.size()) {
+            LLVM_DEBUG(dbgs() << "Using cached delinearization results\n");
+
+            // Validate the cached subscripts.
+            if (!DisableDelinearizationChecks)
+              if (!validateDelinearizationResult(*SE, Sizes, SrcSubscripts,
+                                                 SrcPtr) ||
+                  !validateDelinearizationResult(*SE, Sizes, DstSubscripts,
+                                                 DstPtr))
+                return false;
+
+            return true;
+          }
+        }
+
+        // Cache had sizes but not pre-computed subscripts for these
+        // instructions, or pre-computed subscripts failed validation.
+        // Compute subscripts using cached sizes.
+        LLVM_DEBUG(dbgs() << "Using cached array sizes for delinearization\n");
+        SrcSubscripts.clear();
+        DstSubscripts.clear();
+        computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
+        computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
+
+        if (SrcSubscripts.size() >= 2 && DstSubscripts.size() >= 2 &&
+            SrcSubscripts.size() == DstSubscripts.size()) {
+          if (!DisableDelinearizationChecks)
+            if (!validateDelinearizationResult(*SE, Sizes, SrcSubscripts,
+                                               SrcPtr) ||
+                !validateDelinearizationResult(*SE, Sizes, DstSubscripts,
+                                               DstPtr))
+              return false;
+
+          return true;
+        }
+      }
+    }
+  }
+
+  // Fall back to pairwise delinearization.
   // First step: collect parametric terms in both array references.
   SmallVector<const SCEV *, 4> Terms;
   collectParametricTerms(*SE, SrcAR, Terms);
   collectParametricTerms(*SE, DstAR, Terms);
 
   // Second step: find subscript sizes.
-  SmallVector<const SCEV *, 4> Sizes;
+  Sizes.clear();
   findArrayDimensions(*SE, Terms, Sizes, ElementSize);
 
   // Third step: compute the access functions for each subscript.
+  SrcSubscripts.clear();
+  DstSubscripts.clear();
   computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
   computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
 
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9ffa602416b05..be74482aedfa0 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -47,6 +47,7 @@
 #include "llvm/Transforms/Scalar/LoopFuse.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/Delinearization.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -2143,6 +2144,9 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &DI = AM.getResult<DependenceAnalysis>(F);
   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+  BatchDelinearization BD(F, SE, LI);
+  BD.populate();
+  DI.setBatchDelinearization(&BD);
   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 330b4abb9942f..bef285332a6bd 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/Delinearization.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/LoopCacheAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -2139,7 +2140,9 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
            << "Computed dependence info, invoking the transform.";
   });
 
-  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+  BatchDelinearization BD(F, AR.SE, AR.LI);
+  BD.populate();
+  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI, &BD);
   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
     return PreservedAnalyses::all();
   U.markLoopNestChanged(true);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 4fe74c7c3bbcd..34e5b443608b1 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/Delinearization.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -457,7 +458,9 @@ PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,
                                             LPMUpdater &U) {
   Function &F = *LN.getParent();
 
-  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+  BatchDelinearization BD(F, AR.SE, AR.LI);
+  BD.populate();
+  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI, &BD);
   OptimizationRemarkEmitter ORE(&F);
 
   bool AnyLoopRemoved = false;
diff --git a/llvm/test/Analysis/DDG/basic-loopnest.ll b/llvm/test/Analysis/DDG/basic-loopnest.ll
index 75efff570048b..61003298438f6 100644
--- a/llvm/test/Analysis/DDG/basic-loopnest.ll
+++ b/llvm/test/Analysis/DDG/basic-loopnest.ll
@@ -1,7 +1,5 @@
 ; RUN: opt < %s -disable-output "-passes=print<ddg>" 2>&1 | FileCheck %s
 
-; XFAIL: *
-; At the moment, DependenceAnalysis cannot infer `n` to be positive.
 
 
 ; CHECK-LABEL: 'DDG' for loop 'test1.for.cond1.preheader':
diff --git a/llvm/test/Analysis/DependenceAnalysis/BatchDelinearization.ll b/llvm/test/Analysis/DependenceAnalysis/BatchDelinearization.ll
new file mode 100644
index 0000000000000..27ef4c6db3f27
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/BatchDelinearization.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN: | FileCheck %s
+
+; Test case for batch delinearization. When multiple accesses to the same
+; base pointer are analyzed together, terms from all accesses are collected
+; to determine array dimensions, leading to better precision.
+;
+; This test has three accesses to array A:
+;   A[i*m + j]  (in the write)
+;   A[i*m + j]  (in the read)
+;   A[k*m + l]  (third access that provides additional context)
+;
+; The third access helps provide more terms for delinearization,
+; which can improve precision when analyzing the first two accesses.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Three accesses to the same 2D array A[n][m].
+; Batch delinearization collects terms from all accesses.
+define void @batch_delin_test(i64 %n, i64 %m, ptr nocapture %A) {
+; CHECK-LABEL: 'batch_delin_test'
+; CHECK-NEXT:  Src: %load1 = load double, ptr %arrayidx1, align 8 --> Dst: %load1 = load double, ptr %arrayidx1, align 8
+; CHECK-NEXT:    da analyze - input [* *]!
+; CHECK-NEXT:  Src: %load1 = load double, ptr %arrayidx1, align 8 --> Dst: store double %add, ptr %arrayidx1, align 8
+; CHECK-NEXT:    da analyze - anti [* *|<]!
+; CHECK-NEXT:  Src: %load1 = load double, ptr %arrayidx1, align 8 --> Dst: %load2 = load double, ptr %arrayidx2, align 8
+; CHECK-NEXT:    da analyze - input [<> *]!
+; CHECK-NEXT:  Src: store double %add, ptr %arrayidx1, align 8 --> Dst: store double %add, ptr %arrayidx1, align 8
+; CHECK-NEXT:    da analyze - output [* *]!
+; CHECK-NEXT:  Src: store double %add, ptr %arrayidx1, align 8 --> Dst: %load2 = load double, ptr %arrayidx2, align 8
+; CHECK-NEXT:    da analyze - flow [<> *]!
+; CHECK-NEXT:  Src: %load2 = load double, ptr %arrayidx2, align 8 --> Dst: %load2 = load double, ptr %arrayidx2, align 8
+; CHECK-NEXT:    da analyze - input [* *]!
+;
+entry:
+  %cmp1 = icmp sgt i64 %n, 0
+  %cmp2 = icmp sgt i64 %m, 0
+  %cond = and i1 %cmp1, %cmp2
+  br i1 %cond, label %loop.i.preheader, label %exit
+
+loop.i.preheader:
+  br label %loop.i
+
+loop.i:
+  %i = phi i64 [ 0, %loop.i.preheader ], [ %i.next, %loop.i.latch ]
+  br label %loop.j
+
+loop.j:
+  %j = phi i64 [ 0, %loop.i ], [ %j.next, %loop.j ]
+  ; Compute linear index: i*m + j
+  %mul1 = mul nsw i64 %i, %m
+  %idx1 = add nsw i64 %mul1, %j
+  %arrayidx1 = getelementptr inbounds double, ptr %A, i64 %idx1
+  ; First access: load A[i*m + j]
+  %load1 = load double, ptr %arrayidx1, align 8
+  %add = fadd double %load1, 1.0
+  ; Second access: store A[i*m + j]
+  store double %add, ptr %arrayidx1, align 8
+  ; Third access at a different index: load A[(i+1)*m + j]
+  ; This provides additional terms for delinearization.
+  %i_plus_1 = add nsw i64 %i, 1
+  %mul2 = mul nsw i64 %i_plus_1, %m
+  %idx2 = add nsw i64 %mul2, %j
+  %arrayidx2 = getelementptr inbounds double, ptr %A, i64 %idx2
+  %load2 = load double, ptr %arrayidx2, align 8
+  %j.next = add nuw nsw i64 %j, 1
+  %j.cond = icmp slt i64 %j.next, %m
+  br i1 %j.cond, label %loop.j, label %loop.i.latch
+
+loop.i.latch:
+  %i.next = add nuw nsw i64 %i, 1
+  %i.cond = icmp slt i64 %i.next, %n
+  br i1 %i.cond, label %loop.i, label %exit
+
+exit:
+  ret void
+}
+
+; Test with parametric sizes where batch delinearization helps.
+; Two separate loop nests accessing the same array.
+define void @batch_delin_two_nests(i64 %n, i64 %m, ptr nocapture %A) {
+; CHECK-LABEL: 'batch_delin_two_nests'
+; CHECK-NEXT:  Src: store double 1.000000e+00, ptr %arrayidx1, align 8 --> Dst: store double 1.000000e+00, ptr %arrayidx1, align 8
+; CHECK-NEXT:    da analyze - output [* *]!
+; CHECK-NEXT:  Src: store double 1.000000e+00, ptr %arrayidx1, align 8 --> Dst: %load = load double, ptr %arrayidx2, align 8
+; CHECK-NEXT:    da analyze - flow [|<]!
+; CHECK-NEXT:  Src: %load = load double, ptr %arrayidx2, align 8 --> Dst: %load = load double, ptr %arrayidx2, align 8
+; CHECK-NEXT:    da analyze - input [* *]!
+;
+entry:
+  %cmp1 = icmp sgt i64 %n, 0
+  %cmp2 = icmp sgt i64 %m, 0
+  %cond = and i1 %cmp1, %cmp2
+  br i1 %cond, label %nest1.i.preheader, label %exit
+
+; First loop nest: stores to A[i*m + j]
+nest1.i.preheader:
+  br label %nest1.i
+
+nest1.i:
+  %i1 = phi i64 [ 0, %nest1.i.preheader ], [ %i1.next, %nest1.i.latch ]
+  br label %nest1.j
+
+nest1.j:
+  %j1 = phi i64 [ 0, %nest1.i ], [ %j1.next, %nest1.j ]
+  %mul1 = mul nsw i64 %i1, %m
+  %idx1 = add nsw i64 %mul1, %j1
+  %arrayidx1 = getelementptr inbounds double, ptr %A, i64 %idx1
+  store double 1.0, ptr %arrayidx1, align 8
+  %j1.next = add nuw nsw i64 %j1, 1
+  %j1.cond = icmp slt i64 %j1.next, %m
+  br i1 %j1.cond, label %nest1.j, label %nest1.i.latch
+
+nest1.i.latch:
+  %i1.next = add nuw nsw i64 %i1, 1
+  %i1.cond = icmp slt i64 %i1.next, %n
+  br i1 %i1.cond, label %nest1.i, label %nest2.i.preheader
+
+; Second loop nest: reads from A[k*m + l]
+nest2.i.preheader:
+  br label %nest2.i
+
+nest2.i:
+  %i2 = phi i64 [ 0, %nest2.i.preheader ], [ %i2.next, %nest2.i.latch ]
+  br label %nest2.j
+
+nest2.j:
+  %j2 = phi i64 [ 0, %nest2.i ], [ %j2.next, %nest2.j ]
+  %mul2 = mul nsw i64 %i2, %m
+  %idx2 = add nsw i64 %mul2, %j2
+  %arrayidx2 = getelementptr inbounds double, ptr %A, i64 %idx2
+  %load = load double, ptr %arrayidx2, align 8
+  %j2.next = add nuw nsw i64 %j2, 1
+  %j2.cond = icmp slt i64 %j2.next, %m
+  br i1 %j2.cond, label %nest2.j, label %nest2.i.latch
+
+nest2.i.latch:
+  %i2.next = add nuw nsw i64 %i2, 1
+  %i2.cond = icmp slt i64 %i2.next, %n
+  br i1 %i2.cond, label %nest2.i, label %exit
+
+exit:
+  ret void
+}
+
diff --git a/llvm/test/Transforms/LICM/lnicm.ll b/llvm/test/Transforms/LICM/lnicm.ll
index e331ab7d39e83..814f964666305 100644
--- a/llvm/test/Transforms/LICM/lnicm.ll
+++ b/llvm/test/Transforms/LICM/lnicm.ll
@@ -3,9 +3,6 @@
 ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM
 ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM
 
-; XFAIL: *
-; Loop interchange currently fails due to a failure in dependence analysis.
-
 ; This test represents the following function:
 ; void test(int n, int m, int x[m][n], int y[n], int *z) {
 ;   for (int k = 0; k < n; k++) {
diff --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
index 14836ba73433d..a5cd1cb924e84 100644
--- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -58,21 +58,17 @@ for.end19:
   ret void
 }
 
+; With batch delinearization, the dependences are now computed correctly.
+; The interchange is still not profitable, but for a different reason.
 ; CHECK: --- !Analysis
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        test01
-; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
-; CHECK-NEXT: ...
 
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Name:            InterchangeNotProfitable
 ; CHECK-NEXT: Function:        test01
-; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          All loops have dependencies in all directions.
-; CHECK-NEXT: ...
 
 ; DELIN: --- !Analysis
 ; DELIN-NEXT: Pass:            loop-interchange
@@ -134,21 +130,17 @@ define void @test02(i32 %k, i32 %N) {
    ret void
 }
 
+; With batch delinearization, the dependences are now computed correctly
+; and the loop can be interchanged (same behavior as DELIN).
 ; CHECK: --- !Analysis
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        test02
-; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
-; CHECK-NEXT: ...
 
-; CHECK: --- !Missed
+; CHECK: --- !Passed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Name:            Interchanged
 ; CHECK-NEXT: Function:        test02
-; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          All loops have dependencies in all directions.
-; CHECK-NEXT: ...
 
 ; DELIN: --- !Analysis
 ; DELIN-NEXT: Pass:            loop-interchange
@@ -285,13 +277,12 @@ for.end17:
   ret void
 }
 
+; With batch delinearization, the dependences are now computed correctly.
+; The real reason for not interchanging is that loops are not tightly nested.
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Name:            NotTightlyNested
 ; CHECK-NEXT: Function:        test04
-; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          All loops have dependencies in all directions.
-; CHECK-NEXT: ...
 
 ; DELIN: --- !Missed
 ; DELIN-NEXT: Pass:            loop-interchange
diff --git a/llvm/test/Transforms/LoopInterchange/outer-dependency-lte.ll b/llvm/test/Transforms/LoopInterchange/outer-dependency-lte.ll
index 4aba99f35678e..c8e79dc169b1d 100644
--- a/llvm/test/Transforms/LoopInterchange/outer-dependency-lte.ll
+++ b/llvm/test/Transforms/LoopInterchange/outer-dependency-lte.ll
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        f
 ; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          All loops have dependencies in all directions.
+; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...