-
Notifications
You must be signed in to change notification settings - Fork 15.2k
release/20.x: [SLP] Check for PHI nodes (potentially cycles!) when checking dependencies #127267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) ChangesBackport ac217ee Requested by: @Zentrik Full diff: https://github.com/llvm/llvm-project/pull/127267.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 19963e780ebd3..7b20eda550095 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13181,8 +13181,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
continue;
// If the user instruction is used for some reason in different
// vectorized nodes - make it depend on index.
+ // If any vector node is PHI node, this dependency might not work
+ // because of cycle dependencies, so disable it.
if (TEUseEI.UserTE != UseEI.UserTE &&
- TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
+ (TEUseEI.UserTE->Idx < UseEI.UserTE->Idx ||
+ any_of(
+ VectorizableTree,
+ [](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::Vectorize &&
+ TE->getOpcode() == Instruction::PHI;
+ })))
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 5562291dbb6be..bf3f0c4df74e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -31,7 +31,7 @@ define void @test() {
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
-; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index 166c819098c8c..d649465c9ff12 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '-6'
+; YAML: - Cost: '-3'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '14'
; YAML: ...
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
new file mode 100644
index 0000000000000..22e7e6a8e6624
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
+
+define void @test(float %0) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
+; CHECK-NEXT: br label %[[BB6:.*]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: br label %[[BB10:.*]]
+; CHECK: [[BB9:.*]]:
+; CHECK-NEXT: br label %[[BB10]]
+; CHECK: [[BB10]]:
+; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ]
+; CHECK-NEXT: br label %[[BB12:.*]]
+; CHECK: [[BB12]]:
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1
+; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]])
+; CHECK-NEXT: ret void
+;
+ %2 = fdiv float 0.000000e+00, 0.000000e+00
+ %3 = fdiv float 0.000000e+00, 0.000000e+00
+ %4 = fdiv float %0, 0.000000e+00
+ br label %5
+
+5:
+ %6 = fmul float %4, 0.000000e+00
+ %7 = fsub float 0.000000e+00, %6
+ %8 = fmul float %3, 0.000000e+00
+ %9 = fsub float 0.000000e+00, %8
+ br label %11
+
+10:
+ br label %11
+
+11:
+ %12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ]
+ %13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ]
+ br label %14
+
+14:
+ %15 = fmul float %2, 0.000000e+00
+ %16 = fsub float %12, %15
+ %17 = fmul float %4, 0.000000e+00
+ %18 = fsub float %13, %17
+ %19 = fadd float %16, %18
+ %20 = call float @llvm.fabs.f32(float %19)
+ ret void
+}
+
|
alexey-bataev
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
…ncies When checking for dependecies for gather nodes with users with the same last instruction, cannot rely on the index order, if there is (even potential!) cycle in the graph, which may cause order not work correctly and cause compiler crash. Fixes llvm#127128 (cherry picked from commit ac217ee)
|
@Zentrik (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
|
This change seems to be causing the LLVM test
Can someone take a look? |
|
This was my mistake, I merged it without checking the results of the CI. I will revert this. |
|
/cherry-pick ac217ee |
|
/pull-request #127294 |
This pull request is for re-applying the patch, we can discuss it further on that PR. |
Backport ac217ee
Requested by: @Zentrik