llvm · kasuga-fj · Jun 5, 2025 · May 19, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -209,8 +209,24 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
             Direction = '*';
           Dep.push_back(Direction);
         }
+
+        // In some cases, the Levels of Dependence may be less than the depth of
+        // the loop nest, particularly when the array's dimension is smaller
+        // than the loop nest depth. In such cases, fill the end of the
+        // direction vector with '*'
+        // TODO: This can be false dependency in some cases, e.g.,
+        //
+        //   for (int i=0; i<N; i++)
+        //     for (int j=0; j<N; j++)
+        //       for (int k=0; k<N; k++)
+        //         v[i] += a[i][j][k];
+        //
+        // In this example, a dependency exists between the load and store of
+        // v[i]. The direction vector returned by DependenceInfo would be [=],
+        // and thus it would become [= * *]. However, if the addition is
+        // associative, exchanging the j-loop and k-loop is legal.
         while (Dep.size() != Level) {
-          Dep.push_back('I');
+          Dep.push_back('*');
         }
 
         // Make sure we only add unique entries to the dependency matrix.
@@ -1214,7 +1230,7 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
 static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
   for (unsigned I = 0; I != DepMatrix.size(); I++) {
     char Dir = DepMatrix[I][LoopId];
-    if (Dir != 'I' && Dir != '=')
+    if (Dir != '=')
       return false;
   }
   return true;

diff --git a/llvm/test/Transforms/LoopInterchange/dependence-levels-less-than-loop-depth.ll b/llvm/test/Transforms/LoopInterchange/dependence-levels-less-than-loop-depth.ll
@@ -0,0 +1,93 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
+; RUN:     -disable-output -debug 2>&1 | FileCheck %s
+
+;; In the following case, p0 and p1 may alias, so the direction vector must be [* *].
+;;
+;; void may_alias(float *p0, float *p1) {
+;;   for (int i = 0; i < 4; i++)
+;;     for (int j = 0; j < 4; j++)
+;;       p0[4 * i + j] = p1[4 * j + i];
+;; }
+
+; CHECK:      Dependency matrix before interchange:
+; CHECK-NEXT: * *
+; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
+define void @may_alias(ptr %p0, ptr %p1) {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j
+
+for.j:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+  %idx.0 = getelementptr inbounds [4 x [4 x float]], ptr %p0, i32 0, i32 %j, i32 %i
+  %idx.1 = getelementptr inbounds [4 x [4 x float]], ptr %p1, i32 0, i32 %i, i32 %j
+  %0 = load float, ptr %idx.0, align 4
+  store float %0, ptr %idx.1, align 4
+  %j.inc = add nuw nsw i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add nuw nsw i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 4
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
+
+@A = global [4 x [4 x [4 x float]]] zeroinitializer, align 4
+@V = global [4 x float] zeroinitializer, align 4
+
+;; In the following case, there is an all direction dependence for the j-loop
+;; and k-loop.
+;;
+;; for (int i = 0; i < 4; i++)
+;;   for (int j = 0; j < 4; j++)
+;;     for (int k = 0; k < 4; k++)
+;;       V[i] += A[i][j][k];
+
+; CHECK:      Dependency matrix before interchange:
+; CHECK-NEXT: = * *
+; CHECK-NEXT: Processing InnerLoopId = 2 and OuterLoopId = 1
+define void @partial_reduction() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  br label %for.j.header
+
+for.j.header:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
+  br label %for.k
+
+for.k:
+  %k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
+  %idx.a = getelementptr inbounds [4 x [4 x [4 x float]]], ptr @A, i32 0, i32 %i, i32 %j, i32 %k
+  %idx.v = getelementptr inbounds [4 x float], ptr @V, i32 0, i32 %i
+  %0 = load float, ptr %idx.a, align 4
+  %1 = load float, ptr %idx.v, align 4
+  %add = fadd fast float %0, %1
+  store float %add, ptr %idx.v, align 4
+  %k.inc = add nuw nsw i32 %k, 1
+  %cmp.k = icmp slt i32 %k.inc, 4
+  br i1 %cmp.k, label %for.k, label %for.j.latch
+
+for.j.latch:
+  %j.inc = add nuw nsw i32 %j, 1
+  %cmp.j = icmp slt i32 %j.inc, 4
+  br i1 %cmp.j, label %for.j.header, label %for.i.latch
+
+for.i.latch:
+  %i.inc = add nuw nsw i32 %i, 1
+  %cmp.i = icmp slt i32 %i.inc, 4
+  br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
@@ -71,10 +71,11 @@ for.end8:                                         ; preds = %for.cond1.for.inc6_
 
 ;; Not tightly nested. Do not interchange.
 ;; Not interchanged hence the phi's in the inner loop will not be split.
+;; FIXME: Currently this loops is rejected for dependence reasons.
 
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            UnsupportedPHIOuter
+; CHECK-NEXT: Name:            Dependence
 ; CHECK-NEXT: Function:        reduction_03
 
 ; IR-LABEL: @reduction_03(

diff --git a/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll b/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll
@@ -9,14 +9,16 @@
 ; the innermost loop has a reduction operation which is however not in a form
 ; that loop interchange can handle. Interchanging the outermost and the
 ; middle loops would intervene with the reduction and cause miscompile.
+;
+; FIXME: Currently interchanges are rejected for dependence reasons.
 
 ; REMARKS: --- !Missed
 ; REMARKS-NEXT: Pass:            loop-interchange
-; REMARKS-NEXT: Name:            UnsupportedPHIInner
+; REMARKS-NEXT: Name:            Dependence
 ; REMARKS-NEXT: Function:        test7
 ; REMARKS: --- !Missed
 ; REMARKS-NEXT: Pass:            loop-interchange
-; REMARKS-NEXT: Name:            UnsupportedPHIInner
+; REMARKS-NEXT: Name:            Dependence
 ; REMARKS-NEXT: Function:        test7
 
 define i32 @test7() {

diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
@@ -4,7 +4,7 @@
 
 ; Triply nested loop, should be able to do interchange three times
 ; to get the ideal access pattern.
-; void f(int e[10][10][10], int f[10][10][10]) {
+; void f(int e[restrict 10][10][10], int f[restrict 10][10][10]) {
 ;   for (int a = 0; a < 10; a++) {
 ;     for (int b = 0; b < 10; b++) {
 ;       for (int c = 0; c < 10; c++) {
@@ -35,7 +35,7 @@
 ; REMARKS-NEXT: Name:            Interchanged
 ; REMARKS-NEXT: Function:        pr43326-triply-nested
 
-define void @pr43326-triply-nested(ptr %e, ptr %f) {
+define void @pr43326-triply-nested(ptr noalias %e, ptr noalias %f) {
 entry:
   br label %for.outermost.header
 

diff --git a/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll b/llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -passes=loop-interchange -S -debug 2>&1 | FileCheck %s
 
 ; CHECK:       Dependency matrix before interchange:
-; CHECK-NEXT:  I I
+; CHECK-NEXT:  * *
 ; CHECK-NEXT:  = *
 ; CHECK-NEXT:  < *
 ; CHECK-NEXT:  Processing InnerLoopId