-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LoopInterchange] Forbid interchange when the dependency is confused #78533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoopInterchange] Forbid interchange when the dependency is confused #78533
Conversation
llvm#47259 A confused dependency(*f) in loop should prevent interchange in loops i and j. void test_deps() { for (int i = 0; i <= 3; i++) for (int j = 0; j <= 3; j++) { *f ^= 0x1000; c[j][i] = *f; } }
|
@llvm/pr-subscribers-llvm-transforms Author: None (ShivaChen) ChangesA confused dependency(*f) in loop should prevent interchange in loops i and j. Full diff: https://github.com/llvm/llvm-project/pull/78533.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 277f530ee25fc1..0ea8244924c0f3 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -123,6 +123,13 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
// Track Output, Flow, and Anti dependencies.
if (auto D = DI->depends(Src, Dst, true)) {
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+
+ if (D->isConfused()) {
+ LLVM_DEBUG(dbgs() << " Confused dependency between:\n"
+ << " " << *Src << "\n"
+ << " " << *Dst << "\n");
+ return false;
+ }
// If the direction vector is negative, normalize it to
// make it non-negative.
if (D->normalize(SE))
diff --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll
index b41eba4ef56173..d3a0675caa7997 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll
@@ -180,7 +180,7 @@ for.end16: ; preds = %for.exit
; REMARK: Interchanged
; REMARK-NEXT: lcssa_05
-define void @lcssa_05(ptr %ptr) {
+define void @lcssa_05(ptr noalias %ptr) {
entry:
br label %outer.header
@@ -225,7 +225,7 @@ for.end16: ; preds = %for.exit
; REMARK: UnsupportedExitPHI
; REMARK-NEXT: lcssa_06
-define void @lcssa_06(ptr %ptr, ptr %ptr1) {
+define void @lcssa_06(ptr %ptr, ptr noalias %ptr1) {
entry:
br label %outer.header
diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
index def68ca3cd07eb..3b31c2634be71e 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
@@ -27,7 +27,7 @@
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: pr43326-triply-nested
-define void @pr43326-triply-nested(ptr %e, ptr %f) {
+define void @pr43326-triply-nested(ptr noalias %e, ptr %f) {
entry:
br label %for.outermost.header
diff --git a/llvm/test/Transforms/LoopInterchange/pr47259.ll b/llvm/test/Transforms/LoopInterchange/pr47259.ll
new file mode 100644
index 00000000000000..2a365a62795f81
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/pr47259.ll
@@ -0,0 +1,55 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: -S -debug 2>&1 | FileCheck %s
+
+;; Test that a confused dependency in loop should prevent interchange in
+;; loops i and j.
+;;
+;; void test_deps() {
+;; for (int i = 0; i <= 3; i++)
+;; for (int j = 0; j <= 3; j++) {
+;; *f ^= 0x1000;
+;; c[j][i] = *f;
+;; }
+;; }
+
+
+; CHECK: Confused dependency between:
+; CHECK: store i32 %xor, ptr %arrayidx6, align 4
+; CHECK: %1 = load i32, ptr %0, align 4
+; CHECK-NOT: Loops interchanged.
+
+@a = global i32 0, align 4
+@f = global ptr @a, align 8
+@c = global [4 x [4 x i32]] zeroinitializer, align 8
+
+define void @test_deps() {
+entry:
+ %0 = load ptr, ptr @f, align 8
+ br label %for.cond1.preheader
+
+; Loop:
+for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3
+ %indvars.iv16 = phi i64 [ 0, %entry ], [ %indvars.iv.next17, %for.cond.cleanup3 ]
+ br label %for.body4
+
+for.cond.cleanup3: ; preds = %for.body4
+ %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
+ %exitcond18 = icmp ne i64 %indvars.iv.next17, 4
+ br i1 %exitcond18, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.body4: ; preds = %for.cond1.preheader, %for.body4
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+ %1 = load i32, ptr %0, align 4
+ %xor = xor i32 %1, 4096
+ store i32 %xor, ptr %0, align 4
+ %arrayidx6 = getelementptr inbounds [4 x [4 x i32]], ptr @c, i64 0, i64 %indvars.iv, i64 %indvars.iv16
+ store i32 %xor, ptr %arrayidx6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 4
+ br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
+
+; Exit blocks
+for.cond.cleanup: ; preds = %for.cond.cleanup3
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
index fa2021a15a0801..40cf1e9623b8c7 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -156,7 +156,7 @@ for1.loopexit: ; preds = %for1.inc
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test4
-define i64 @test4(ptr %Arr, ptr %dst) {
+define i64 @test4(ptr %Arr, ptr noalias %dst) {
entry:
%gep.dst = getelementptr inbounds i64, ptr %dst, i64 42
br label %for1.header
@@ -234,7 +234,7 @@ for1.loopexit: ; preds = %for1.inc
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: test5
-define float @test5(ptr %Arr, ptr %Arr2) {
+define float @test5(ptr %Arr, ptr noalias %Arr2) {
entry:
br label %outer.header
@@ -274,7 +274,7 @@ for.exit: ; preds = %outer.inc
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test6
-define float @test6(ptr %Arr, ptr %Arr2) {
+define float @test6(ptr %Arr, ptr noalias %Arr2) {
entry:
br label %outer.header
diff --git a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
index 03e3b4b7408b5c..b4f9d3f2a3685d 100644
--- a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
+++ b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
@@ -6,35 +6,25 @@
define void @test(ptr noalias %src, ptr %dst) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[INNER_PREHEADER:%.*]]
-; CHECK: outer.header.preheader:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ]
-; CHECK-NEXT: br label [[INNER_SPLIT1:%.*]]
-; CHECK: inner.preheader:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_SPLIT:%.*]] ], [ 0, [[INNER_PREHEADER]] ]
-; CHECK-NEXT: br label [[OUTER_HEADER_PREHEADER]]
-; CHECK: inner.split1:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], [[INNER]] ]
; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds [256 x float], ptr [[SRC:%.*]], <2 x i64> <i64 0, i64 1>, i64 [[J]]
; CHECK-NEXT: [[SRC_0:%.*]] = extractelement <2 x ptr> [[SRC_GEP]], i32 0
; CHECK-NEXT: [[LV_0:%.*]] = load float, ptr [[SRC_0]], align 4
; CHECK-NEXT: [[ADD_0:%.*]] = fadd float [[LV_0]], 1.000000e+00
; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[J]]
; CHECK-NEXT: store float [[ADD_0]], ptr [[DST_GEP]], align 4
-; CHECK-NEXT: [[J_NEXT:%.*]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 1
; CHECK-NEXT: [[INNER_EXITCOND:%.*]] = icmp eq i64 [[J_NEXT]], 100
-; CHECK-NEXT: br label [[OUTER_LATCH]]
-; CHECK: inner.split:
-; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[J]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 100
-; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[INNER]]
+; CHECK-NEXT: br i1 [[INNER_EXITCOND]], label [[OUTER_LATCH]], label [[INNER]]
; CHECK: outer.latch:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[OUTER_EXITCOND:%.*]] = icmp eq i32 [[I_NEXT]], 100
-; CHECK-NEXT: br i1 [[OUTER_EXITCOND]], label [[INNER_SPLIT]], label [[OUTER_HEADER]]
+; CHECK-NEXT: br i1 [[OUTER_EXITCOND]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
|
artagnon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The core change LGTM; a few minor suggestions for the tests.
| @a = global i32 0, align 4 | ||
| @f = global ptr @a, align 8 | ||
| @c = global [4 x [4 x i32]] zeroinitializer, align 8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do these need to be globals? Can they not be passed as arguments to the function?
| ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \ | ||
| ; RUN: -S -debug 2>&1 | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
-disable-output -debug-only=loop-interchange? Also, is -cache-line-size=64 required for the test?
| %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ] | ||
| %1 = load i32, ptr %0, align 4 | ||
| %xor = xor i32 %1, 4096 | ||
| store i32 %xor, ptr %0, align 4 | ||
| %arrayidx6 = getelementptr inbounds [4 x [4 x i32]], ptr @c, i64 0, i64 %indvars.iv, i64 %indvars.iv16 | ||
| store i32 %xor, ptr %arrayidx6, align 4 | ||
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
| %exitcond = icmp ne i64 %indvars.iv.next, 4 | ||
| br i1 %exitcond, label %for.body4, label %for.cond.cleanup3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice if the test were cleaned up a bit. Example: s/indvars.iv/iv.inner/, s/for.body.4/inner.loop/, s/for.cond1.preheader/outer.loop/?
| ; REMARK-NEXT: lcssa_06 | ||
|
|
||
| define void @lcssa_06(ptr %ptr, ptr %ptr1) { | ||
| define void @lcssa_06(ptr %ptr, ptr noalias %ptr1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are all these other noalias changes required as part of the patch? If so, I can recommend marking both arguments noalias, and mentioning why in the commit message (eg. if these tests were faulty to begin with).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was this test faulty?
|
Kindly write this as "Fixes ..." and move it to the last line of the message, so that the issue will be auto-closed. |
| @f = global ptr @a, align 8 | ||
| @c = global [4 x [4 x i32]] zeroinitializer, align 8 | ||
|
|
||
| define void @test_deps() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would recommend naming this function confused_deps, as the "test" part is already implied.
#47259
A confused dependency(*f) in loop should prevent interchange in loops i and j.