Skip to content

Commit 418a987

Browse files
committed
[SLP]Do not use node, if it is a subvector or buildvector node
If the buildvector has some matches with another node, which is a subvector of another buildvector node, need to check for this and cancel matching to avoid incorrect ordering of the nodes. Fixes llvm#128770
1 parent 7f482aa commit 418a987

File tree

2 files changed

+80
-1
lines changed

2 files changed

+80
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13353,6 +13353,14 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1335313353
return EdgeInfo(const_cast<TreeEntry *>(TE), 0);
1335413354
return TE->UserTreeIndex;
1335513355
};
13356+
auto HasGatherUser = [&](const TreeEntry *TE) {
13357+
while (TE->Idx != 0 && TE->UserTreeIndex) {
13358+
if (TE->UserTreeIndex.EdgeIdx == UINT_MAX)
13359+
return true;
13360+
TE = TE->UserTreeIndex.UserTE;
13361+
}
13362+
return false;
13363+
};
1335613364
const EdgeInfo TEUseEI = GetUserEntry(TE);
1335713365
if (!TEUseEI)
1335813366
return std::nullopt;
@@ -13453,7 +13461,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1345313461
// If the user instruction is used for some reason in different
1345413462
// vectorized nodes - make it depend on index.
1345513463
if (TEUseEI.UserTE != UseEI.UserTE &&
13456-
TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
13464+
(TEUseEI.UserTE->Idx < UseEI.UserTE->Idx ||
13465+
HasGatherUser(TEUseEI.UserTE)))
1345713466
continue;
1345813467
}
1345913468

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s
3+
4+
define void @test(ptr %0, i64 %1, i64 %2) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
8+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> poison, <4 x i32> zeroinitializer
9+
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
10+
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
11+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3, i32 1>
12+
; CHECK-NEXT: br [[DOTPREHEADER_LR_PH:label %.*]]
13+
; CHECK: [[_PREHEADER_LR_PH:.*:]]
14+
; CHECK-NEXT: br [[DOTPREHEADER_US_US_PREHEADER:label %.*]]
15+
; CHECK: [[_PREHEADER_US_US_PREHEADER:.*:]]
16+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i64> poison, i64 [[TMP1]], i32 0
17+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i64> [[TMP9]], i64 [[TMP2]], i32 1
18+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
19+
; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP11]], <4 x i64> [[TMP6]], i64 4)
20+
; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP8]]
21+
; CHECK-NEXT: br [[DOTPREHEADER_US_US:label %.*]]
22+
; CHECK: [[_PREHEADER_US_US:.*:]]
23+
; CHECK-NEXT: [[TMP14:%.*]] = icmp ult <8 x i64> [[TMP13]], splat (i64 32)
24+
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP14]])
25+
; CHECK-NEXT: br i1 [[TMP15]], label %[[SCALAR_PH:.*]], [[DOTPREHEADER_US_US]]
26+
; CHECK: [[SCALAR_PH]]:
27+
; CHECK-NEXT: ret void
28+
;
29+
%4 = ptrtoint ptr %0 to i64
30+
%5 = ptrtoint ptr %0 to i64
31+
%6 = ptrtoint ptr %0 to i64
32+
%7 = ptrtoint ptr %0 to i64
33+
%8 = ptrtoint ptr %0 to i64
34+
br label %.preheader.lr.ph
35+
36+
.preheader.lr.ph:
37+
br label %.preheader.us.us.preheader
38+
39+
.preheader.us.us.preheader:
40+
%9 = or i64 %1, %7
41+
%10 = or i64 %2, %7
42+
%11 = or i64 %1, %4
43+
%12 = or i64 %1, %8
44+
%13 = or i64 %5, %8
45+
%14 = or i64 %8, %4
46+
%15 = or i64 %5, %6
47+
%16 = or i64 %6, %4
48+
br label %.preheader.us.us
49+
50+
.preheader.us.us:
51+
%diff.check1 = icmp ult i64 %9, 32
52+
%diff.check2 = icmp ult i64 %10, 32
53+
%conflict.rdx3 = or i1 %diff.check1, %diff.check2
54+
%diff.check4 = icmp ult i64 %11, 32
55+
%conflict.rdx5 = or i1 %conflict.rdx3, %diff.check4
56+
%diff.check6 = icmp ult i64 %12, 32
57+
%conflict.rdx7 = or i1 %conflict.rdx5, %diff.check6
58+
%diff.check8 = icmp ult i64 %13, 32
59+
%conflict.rdx9 = or i1 %conflict.rdx7, %diff.check8
60+
%diff.check10 = icmp ult i64 %14, 32
61+
%conflict.rdx11 = or i1 %conflict.rdx9, %diff.check10
62+
%diff.check12 = icmp ult i64 %15, 32
63+
%conflict.rdx13 = or i1 %conflict.rdx11, %diff.check12
64+
%diff.check14 = icmp ult i64 %16, 32
65+
%conflict.rdx15 = or i1 %conflict.rdx13, %diff.check14
66+
br i1 %conflict.rdx15, label %scalar.ph, label %.preheader.us.us
67+
68+
scalar.ph:
69+
ret void
70+
}

0 commit comments

Comments
 (0)