Skip to content

Conversation

@ShivaChen
Copy link
Collaborator

#47259

A confused dependency(*f) in loop should prevent interchange in loops i and j.

void test_deps() {
  for (int i = 0; i <= 3; i++)
    for (int j = 0; j <= 3; j++) {
      *f ^= 0x1000;
      c[j][i] = *f;
    }
}

llvm#47259

A confused dependency(*f) in loop should prevent interchange in
loops i and j.

void test_deps() {
  for (int i = 0; i <= 3; i++)
    for (int j = 0; j <= 3; j++) {
      *f ^= 0x1000;
      c[j][i] = *f;
    }
}
@llvmbot
Copy link
Member

llvmbot commented Jan 18, 2024

@llvm/pr-subscribers-llvm-transforms

Author: None (ShivaChen)

Changes

#47259

A confused dependency(*f) in loop should prevent interchange in loops i and j.

void test_deps() {
  for (int i = 0; i &lt;= 3; i++)
    for (int j = 0; j &lt;= 3; j++) {
      *f ^= 0x1000;
      c[j][i] = *f;
    }
}

Full diff: https://github.com/llvm/llvm-project/pull/78533.diff

6 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+7)
  • (modified) llvm/test/Transforms/LoopInterchange/lcssa.ll (+2-2)
  • (modified) llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll (+1-1)
  • (added) llvm/test/Transforms/LoopInterchange/pr47259.ll (+55)
  • (modified) llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll (+3-3)
  • (modified) llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll (+5-15)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 277f530ee25fc1..0ea8244924c0f3 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -123,6 +123,13 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
       // Track Output, Flow, and Anti dependencies.
       if (auto D = DI->depends(Src, Dst, true)) {
         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+
+        if (D->isConfused()) {
+          LLVM_DEBUG(dbgs() << "  Confused dependency between:\n"
+                            << "  " << *Src << "\n"
+                            << "  " << *Dst << "\n");
+          return false;
+        }
         // If the direction vector is negative, normalize it to
         // make it non-negative.
         if (D->normalize(SE))
diff --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll
index b41eba4ef56173..d3a0675caa7997 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll
@@ -180,7 +180,7 @@ for.end16:                                        ; preds = %for.exit
 ; REMARK: Interchanged
 ; REMARK-NEXT: lcssa_05
 
-define void @lcssa_05(ptr %ptr) {
+define void @lcssa_05(ptr noalias %ptr) {
 entry:
   br label %outer.header
 
@@ -225,7 +225,7 @@ for.end16:                                        ; preds = %for.exit
 ; REMARK: UnsupportedExitPHI
 ; REMARK-NEXT: lcssa_06
 
-define void @lcssa_06(ptr %ptr, ptr %ptr1) {
+define void @lcssa_06(ptr %ptr, ptr noalias %ptr1) {
 entry:
   br label %outer.header
 
diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
index def68ca3cd07eb..3b31c2634be71e 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
@@ -27,7 +27,7 @@
 ; REMARKS-NEXT: Name:            Interchanged
 ; REMARKS-NEXT: Function:        pr43326-triply-nested
 
-define void @pr43326-triply-nested(ptr %e, ptr %f) {
+define void @pr43326-triply-nested(ptr noalias %e, ptr %f) {
 entry:
   br label %for.outermost.header
 
diff --git a/llvm/test/Transforms/LoopInterchange/pr47259.ll b/llvm/test/Transforms/LoopInterchange/pr47259.ll
new file mode 100644
index 00000000000000..2a365a62795f81
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/pr47259.ll
@@ -0,0 +1,55 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN:     -S -debug 2>&1 | FileCheck %s
+
+;; Test that a confused dependency in loop should prevent interchange in
+;; loops i and j.
+;;
+;; void test_deps() {
+;;   for (int i = 0; i <= 3; i++)
+;;     for (int j = 0; j <= 3; j++) {
+;;       *f ^= 0x1000;
+;;       c[j][i] = *f;
+;;     }
+;; }
+
+
+; CHECK:  Confused dependency between:
+; CHECK:    store i32 %xor, ptr %arrayidx6, align 4
+; CHECK:    %1 = load i32, ptr %0, align 4
+; CHECK-NOT: Loops interchanged.
+
+@a = global i32 0, align 4
+@f = global ptr @a, align 8
+@c = global [4 x [4 x i32]] zeroinitializer, align 8
+
+define void @test_deps() {
+entry:
+  %0 = load ptr, ptr @f, align 8
+  br label %for.cond1.preheader
+
+; Loop:
+for.cond1.preheader:                              ; preds = %entry, %for.cond.cleanup3
+  %indvars.iv16 = phi i64 [ 0, %entry ], [ %indvars.iv.next17, %for.cond.cleanup3 ]
+  br label %for.body4
+
+for.cond.cleanup3:                                ; preds = %for.body4
+  %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
+  %exitcond18 = icmp ne i64 %indvars.iv.next17, 4
+  br i1 %exitcond18, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.body4:                                        ; preds = %for.cond1.preheader, %for.body4
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %1 = load i32, ptr %0, align 4
+  %xor = xor i32 %1, 4096
+  store i32 %xor, ptr %0, align 4
+  %arrayidx6 = getelementptr inbounds [4 x [4 x i32]], ptr @c, i64 0, i64 %indvars.iv, i64 %indvars.iv16
+  store i32 %xor, ptr %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
+
+; Exit blocks
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
index fa2021a15a0801..40cf1e9623b8c7 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -156,7 +156,7 @@ for1.loopexit:                                 ; preds = %for1.inc
 ; REMARKS-NEXT: Name:            UnsupportedPHIOuter
 ; REMARKS-NEXT: Function:        test4
 
-define i64 @test4(ptr %Arr, ptr %dst) {
+define i64 @test4(ptr %Arr, ptr noalias %dst) {
 entry:
   %gep.dst = getelementptr inbounds i64, ptr %dst, i64 42
   br label %for1.header
@@ -234,7 +234,7 @@ for1.loopexit:                                 ; preds = %for1.inc
 ; REMARKS-NEXT: Name:            Interchanged
 ; REMARKS-NEXT: Function:        test5
 
-define float @test5(ptr %Arr, ptr %Arr2) {
+define float @test5(ptr %Arr, ptr noalias %Arr2) {
 entry:
   br label %outer.header
 
@@ -274,7 +274,7 @@ for.exit:                                         ; preds = %outer.inc
 ; REMARKS-NEXT: Name:            UnsupportedPHIOuter
 ; REMARKS-NEXT: Function:        test6
 
-define float @test6(ptr %Arr, ptr %Arr2) {
+define float @test6(ptr %Arr, ptr noalias %Arr2) {
 entry:
   br label %outer.header
 
diff --git a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
index 03e3b4b7408b5c..b4f9d3f2a3685d 100644
--- a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
+++ b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
@@ -6,35 +6,25 @@
 define void @test(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[INNER_PREHEADER:%.*]]
-; CHECK:       outer.header.preheader:
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer.header:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ]
-; CHECK-NEXT:    br label [[INNER_SPLIT1:%.*]]
-; CHECK:       inner.preheader:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[INNER:%.*]]
 ; CHECK:       inner:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_SPLIT:%.*]] ], [ 0, [[INNER_PREHEADER]] ]
-; CHECK-NEXT:    br label [[OUTER_HEADER_PREHEADER]]
-; CHECK:       inner.split1:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], [[INNER]] ]
 ; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr inbounds [256 x float], ptr [[SRC:%.*]], <2 x i64> <i64 0, i64 1>, i64 [[J]]
 ; CHECK-NEXT:    [[SRC_0:%.*]] = extractelement <2 x ptr> [[SRC_GEP]], i32 0
 ; CHECK-NEXT:    [[LV_0:%.*]] = load float, ptr [[SRC_0]], align 4
 ; CHECK-NEXT:    [[ADD_0:%.*]] = fadd float [[LV_0]], 1.000000e+00
 ; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[J]]
 ; CHECK-NEXT:    store float [[ADD_0]], ptr [[DST_GEP]], align 4
-; CHECK-NEXT:    [[J_NEXT:%.*]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 1
 ; CHECK-NEXT:    [[INNER_EXITCOND:%.*]] = icmp eq i64 [[J_NEXT]], 100
-; CHECK-NEXT:    br label [[OUTER_LATCH]]
-; CHECK:       inner.split:
-; CHECK-NEXT:    [[TMP0]] = add nuw nsw i64 [[J]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 100
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT:%.*]], label [[INNER]]
+; CHECK-NEXT:    br i1 [[INNER_EXITCOND]], label [[OUTER_LATCH]], label [[INNER]]
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[OUTER_EXITCOND:%.*]] = icmp eq i32 [[I_NEXT]], 100
-; CHECK-NEXT:    br i1 [[OUTER_EXITCOND]], label [[INNER_SPLIT]], label [[OUTER_HEADER]]
+; CHECK-NEXT:    br i1 [[OUTER_EXITCOND]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;

Copy link
Contributor

@artagnon artagnon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The core change LGTM; a few minor suggestions for the tests.

Comment on lines +22 to +24
@a = global i32 0, align 4
@f = global ptr @a, align 8
@c = global [4 x [4 x i32]] zeroinitializer, align 8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these need to be globals? Can they not be passed as arguments to the function?

Comment on lines +2 to +3
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-disable-output -debug-only=loop-interchange? Also, is -cache-line-size=64 required for the test?

Comment on lines +42 to +50
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
%1 = load i32, ptr %0, align 4
%xor = xor i32 %1, 4096
store i32 %xor, ptr %0, align 4
%arrayidx6 = getelementptr inbounds [4 x [4 x i32]], ptr @c, i64 0, i64 %indvars.iv, i64 %indvars.iv16
store i32 %xor, ptr %arrayidx6, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 4
br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if the test were cleaned up a bit. Example: s/indvars.iv/iv.inner/, s/for.body.4/inner.loop/, s/for.cond1.preheader/outer.loop/?

; REMARK-NEXT: lcssa_06

define void @lcssa_06(ptr %ptr, ptr %ptr1) {
define void @lcssa_06(ptr %ptr, ptr noalias %ptr1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all these other noalias changes required as part of the patch? If so, I can recommend marking both arguments noalias, and mentioning why in the commit message (eg. if these tests were faulty to begin with).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this test faulty?

@artagnon artagnon requested a review from nikic September 13, 2024 19:46
@artagnon
Copy link
Contributor

#47259

Kindly write this as "Fixes ..." and move it to the last line of the message, so that the issue will be auto-closed.

@f = global ptr @a, align 8
@c = global [4 x [4 x i32]] zeroinitializer, align 8

define void @test_deps() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would recommend naming this function confused_deps, as the "test" part is already implied.

@ShivaChen ShivaChen closed this May 5, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants