-
Notifications
You must be signed in to change notification settings - Fork 15.2k
release/20.x: [SLP] Check for PHI nodes (potentially cycles!) when checking dependencies #127294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ncies When checking for dependecies for gather nodes with users with the same last instruction, cannot rely on the index order, if there is (even potential!) cycle in the graph, which may cause order not work correctly and cause compiler crash. Fixes llvm#127128 (cherry picked from commit ac217ee)
|
@llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) ChangesBackport ac217ee Requested by: @tstellar Full diff: https://github.com/llvm/llvm-project/pull/127294.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 19963e780ebd3..7b20eda550095 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13181,8 +13181,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
continue;
// If the user instruction is used for some reason in different
// vectorized nodes - make it depend on index.
+ // If any vector node is PHI node, this dependency might not work
+ // because of cycle dependencies, so disable it.
if (TEUseEI.UserTE != UseEI.UserTE &&
- TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
+ (TEUseEI.UserTE->Idx < UseEI.UserTE->Idx ||
+ any_of(
+ VectorizableTree,
+ [](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::Vectorize &&
+ TE->getOpcode() == Instruction::PHI;
+ })))
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 5562291dbb6be..bf3f0c4df74e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -31,7 +31,7 @@ define void @test() {
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
-; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index 166c819098c8c..d649465c9ff12 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '-6'
+; YAML: - Cost: '-3'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '14'
; YAML: ...
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
new file mode 100644
index 0000000000000..22e7e6a8e6624
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
+
+define void @test(float %0) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
+; CHECK-NEXT: br label %[[BB6:.*]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: br label %[[BB10:.*]]
+; CHECK: [[BB9:.*]]:
+; CHECK-NEXT: br label %[[BB10]]
+; CHECK: [[BB10]]:
+; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ]
+; CHECK-NEXT: br label %[[BB12:.*]]
+; CHECK: [[BB12]]:
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1
+; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]])
+; CHECK-NEXT: ret void
+;
+ %2 = fdiv float 0.000000e+00, 0.000000e+00
+ %3 = fdiv float 0.000000e+00, 0.000000e+00
+ %4 = fdiv float %0, 0.000000e+00
+ br label %5
+
+5:
+ %6 = fmul float %4, 0.000000e+00
+ %7 = fsub float 0.000000e+00, %6
+ %8 = fmul float %3, 0.000000e+00
+ %9 = fsub float 0.000000e+00, %8
+ br label %11
+
+10:
+ br label %11
+
+11:
+ %12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ]
+ %13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ]
+ br label %14
+
+14:
+ %15 = fmul float %2, 0.000000e+00
+ %16 = fsub float %12, %15
+ %17 = fmul float %4, 0.000000e+00
+ %18 = fsub float %13, %17
+ %19 = fadd float %16, %18
+ %20 = call float @llvm.fabs.f32(float %19)
+ ret void
+}
+
|
|
@llvm/pr-subscribers-vectorizers Author: None (llvmbot) ChangesBackport ac217ee Requested by: @tstellar Full diff: https://github.com/llvm/llvm-project/pull/127294.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 19963e780ebd3..7b20eda550095 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13181,8 +13181,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
continue;
// If the user instruction is used for some reason in different
// vectorized nodes - make it depend on index.
+ // If any vector node is PHI node, this dependency might not work
+ // because of cycle dependencies, so disable it.
if (TEUseEI.UserTE != UseEI.UserTE &&
- TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
+ (TEUseEI.UserTE->Idx < UseEI.UserTE->Idx ||
+ any_of(
+ VectorizableTree,
+ [](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::Vectorize &&
+ TE->getOpcode() == Instruction::PHI;
+ })))
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 5562291dbb6be..bf3f0c4df74e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -31,7 +31,7 @@ define void @test() {
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
-; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index 166c819098c8c..d649465c9ff12 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '-6'
+; YAML: - Cost: '-3'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '14'
; YAML: ...
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
new file mode 100644
index 0000000000000..22e7e6a8e6624
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
+
+define void @test(float %0) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
+; CHECK-NEXT: br label %[[BB6:.*]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: br label %[[BB10:.*]]
+; CHECK: [[BB9:.*]]:
+; CHECK-NEXT: br label %[[BB10]]
+; CHECK: [[BB10]]:
+; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ]
+; CHECK-NEXT: br label %[[BB12:.*]]
+; CHECK: [[BB12]]:
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1
+; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]])
+; CHECK-NEXT: ret void
+;
+ %2 = fdiv float 0.000000e+00, 0.000000e+00
+ %3 = fdiv float 0.000000e+00, 0.000000e+00
+ %4 = fdiv float %0, 0.000000e+00
+ br label %5
+
+5:
+ %6 = fmul float %4, 0.000000e+00
+ %7 = fsub float 0.000000e+00, %6
+ %8 = fmul float %3, 0.000000e+00
+ %9 = fsub float 0.000000e+00, %8
+ br label %11
+
+10:
+ br label %11
+
+11:
+ %12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ]
+ %13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ]
+ br label %14
+
+14:
+ %15 = fmul float %2, 0.000000e+00
+ %16 = fsub float %12, %15
+ %17 = fmul float %4, 0.000000e+00
+ %18 = fsub float %13, %17
+ %19 = fadd float %16, %18
+ %20 = call float @llvm.fabs.f32(float %19)
+ ret void
+}
+
|
|
@nikic What do you think about backporting this? |
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like there is a test failures in Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll.
|
@alexey-bataev Am I correct in thinking that the |
|
Yes, updating should be enough |
|
Closing in favor of #128371. |
Backport ac217ee
Requested by: @tstellar