Skip to content

Commit cf1f489

Browse files
committed
[SLP]Check only instructions with unique parent instruction user
Need to re-check the instruction with the non-schedulable parent, only if this parent has a user phi node (i.e. it is used only outside the block) and the user instruction has unique parent instruction. Fixes issue reported in llvm@20675ee#commitcomment-168863594
1 parent bfd4935 commit cf1f489

File tree

2 files changed

+189
-2
lines changed

2 files changed

+189
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5608,6 +5608,7 @@ class BoUpSLP {
56085608
for (ScheduleBundle *Bundle : Bundles) {
56095609
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
56105610
break;
5611+
SmallPtrSet<Value *, 4> ParentsUniqueUsers;
56115612
// Need to search for the lane since the tree entry can be
56125613
// reordered.
56135614
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
@@ -5636,6 +5637,22 @@ class BoUpSLP {
56365637
Bundle->getTreeEntry()->isCopyableElement(In)) &&
56375638
"Missed TreeEntry operands?");
56385639

5640+
bool IsNonSchedulableWithParentPhiNode =
5641+
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
5642+
Bundle->getTreeEntry()->UserTreeIndex &&
5643+
Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() &&
5644+
Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() ==
5645+
Instruction::PHI;
5646+
// Count the number of unique phi nodes, which are the parent for
5647+
// parent entry, and exit, if all the unique phis are processed.
5648+
if (IsNonSchedulableWithParentPhiNode) {
5649+
const TreeEntry *ParentTE =
5650+
Bundle->getTreeEntry()->UserTreeIndex.UserTE;
5651+
Value *User = ParentTE->Scalars[Lane];
5652+
if (!ParentsUniqueUsers.insert(User).second)
5653+
break;
5654+
}
5655+
56395656
for (unsigned OpIdx :
56405657
seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
56415658
if (auto *I = dyn_cast<Instruction>(
@@ -5644,8 +5661,8 @@ class BoUpSLP {
56445661
<< *I << "\n");
56455662
DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx, Checked);
56465663
}
5647-
// If parent node is schedulable, it will be handle correctly.
5648-
if (!Bundle->getTreeEntry()->doesNotNeedToSchedule())
5664+
// If parent node is schedulable, it will be handled correctly.
5665+
if (!IsNonSchedulableWithParentPhiNode)
56495666
break;
56505667
It = std::find(std::next(It),
56515668
Bundle->getTreeEntry()->Scalars.end(), In);
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i64 @test(ptr %arg1, i64 %alloca.promoted344, i8 %load.311.i, i1 %load1.i) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: ptr [[ARG1:%.*]], i64 [[ALLOCA_PROMOTED344:%.*]], i8 [[LOAD_311_I:%.*]], i1 [[LOAD1_I:%.*]]) {
7+
; CHECK-NEXT: [[BB:.*]]:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> <i8 0, i8 0, i8 0, i8 poison>, i8 [[LOAD_311_I]], i32 3
9+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> <i8 poison, i8 poison, i8 0, i8 0>, i8 [[LOAD_311_I]], i32 0
10+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[ALLOCA_PROMOTED344]], i32 0
11+
; CHECK-NEXT: br label %[[BB2:.*]]
12+
; CHECK: [[BB2]]:
13+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[BB]] ], [ [[TMP28:%.*]], %[[BB12_8_I:.*]] ]
14+
; CHECK-NEXT: [[TMP4:%.*]] = phi <8 x i8> [ zeroinitializer, %[[BB]] ], [ [[TMP29:%.*]], %[[BB12_8_I]] ]
15+
; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[SPAM_EXIT:.*]], label %[[BB4_LR_PH_I:.*]]
16+
; CHECK: [[BB4_LR_PH_I]]:
17+
; CHECK-NEXT: br i1 true, label %[[BB3_I_I_PEEL:.*]], label %[[EGGS_EXIT_I_PEEL:.*]]
18+
; CHECK: [[BB3_I_I_PEEL]]:
19+
; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], splat (i64 1)
20+
; CHECK-NEXT: [[LOAD4_I_I_PEEL:%.*]] = load i64, ptr [[ARG1]], align 8
21+
; CHECK-NEXT: [[SHL_I_I_PEEL:%.*]] = shl i64 [[LOAD4_I_I_PEEL]], 1
22+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 poison, i32 0>
23+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[SHL_I_I_PEEL]], i32 0
24+
; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP7]]
25+
; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP7]]
26+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
27+
; CHECK-NEXT: br label %[[EGGS_EXIT_I_PEEL]]
28+
; CHECK: [[EGGS_EXIT_I_PEEL]]:
29+
; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i64> [ [[TMP10]], %[[BB3_I_I_PEEL]] ], [ zeroinitializer, %[[BB4_LR_PH_I]] ]
30+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
31+
; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i8>
32+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP12]], i32 1
33+
; CHECK-NEXT: br label %[[SPAM_EXIT]]
34+
; CHECK: [[SPAM_EXIT]]:
35+
; CHECK-NEXT: [[GETELEMENTPTR_I_I_PROMOTED346:%.*]] = phi i64 [ [[TMP14]], %[[EGGS_EXIT_I_PEEL]] ], [ 0, %[[BB2]] ]
36+
; CHECK-NEXT: [[LOAD_8_I:%.*]] = phi i8 [ 0, %[[EGGS_EXIT_I_PEEL]] ], [ 1, %[[BB2]] ]
37+
; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP13]], %[[EGGS_EXIT_I_PEEL]] ], [ zeroinitializer, %[[BB2]] ]
38+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
39+
; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[BB12_8_I]], label %[[BB12_1_THREAD_I:.*]]
40+
; CHECK: [[BB12_1_THREAD_I]]:
41+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0
42+
; CHECK-NEXT: [[ICMP5_3_I:%.*]] = icmp eq i8 [[TMP17]], 0
43+
; CHECK-NEXT: br i1 [[ICMP5_3_I]], label %[[BB12_3_I:.*]], label %[[BB8_3_I:.*]]
44+
; CHECK: [[BB8_3_I]]:
45+
; CHECK-NEXT: br label %[[BB12_3_I]]
46+
; CHECK: [[BB12_3_I]]:
47+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i8> [[TMP4]], i32 1
48+
; CHECK-NEXT: [[ICMP5_4_I:%.*]] = icmp eq i8 [[TMP18]], 0
49+
; CHECK-NEXT: br i1 [[ICMP5_4_I]], label %[[BB12_4_I:.*]], label %[[BB8_4_I:.*]]
50+
; CHECK: [[BB8_4_I]]:
51+
; CHECK-NEXT: br label %[[BB12_4_I]]
52+
; CHECK: [[BB12_4_I]]:
53+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP4]], i32 2
54+
; CHECK-NEXT: [[ICMP5_5_I:%.*]] = icmp eq i8 [[TMP19]], 0
55+
; CHECK-NEXT: br i1 [[ICMP5_5_I]], label %[[BB12_5_I:.*]], label %[[BB8_5_I:.*]]
56+
; CHECK: [[BB8_5_I]]:
57+
; CHECK-NEXT: br label %[[BB12_5_I]]
58+
; CHECK: [[BB12_5_I]]:
59+
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i8> [[TMP4]], i32 3
60+
; CHECK-NEXT: [[ICMP5_7_I:%.*]] = icmp eq i8 [[TMP20]], 0
61+
; CHECK-NEXT: br i1 [[ICMP5_7_I]], label %[[BB12_7_I:.*]], label %[[BB8_7_I:.*]]
62+
; CHECK: [[BB8_7_I]]:
63+
; CHECK-NEXT: br label %[[BB12_7_I]]
64+
; CHECK: [[BB12_7_I]]:
65+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i8> [[TMP4]], i32 4
66+
; CHECK-NEXT: [[ICMP5_8_I:%.*]] = icmp eq i8 [[TMP21]], 0
67+
; CHECK-NEXT: br i1 [[ICMP5_8_I]], label %[[BB12_8_I]], label %[[BB8_8_I:.*]]
68+
; CHECK: [[BB8_8_I]]:
69+
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[LOAD_8_I]], i32 1
70+
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i8> poison, i8 [[LOAD_8_I]], i32 0
71+
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <4 x i32> <i32 poison, i32 5, i32 6, i32 7>
72+
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
73+
; CHECK-NEXT: br label %[[BB12_8_I]]
74+
; CHECK: [[BB12_8_I]]:
75+
; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i8> [ [[TMP0]], %[[BB12_7_I]] ], [ [[TMP22]], %[[BB8_8_I]] ], [ [[TMP15]], %[[SPAM_EXIT]] ]
76+
; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ zeroinitializer, %[[BB12_7_I]] ], [ [[TMP25]], %[[BB8_8_I]] ], [ [[TMP16]], %[[SPAM_EXIT]] ]
77+
; CHECK-NEXT: [[TMP28]] = insertelement <2 x i64> [[TMP2]], i64 [[GETELEMENTPTR_I_I_PROMOTED346]], i32 1
78+
; CHECK-NEXT: [[TMP29]] = shufflevector <4 x i8> [[TMP26]], <4 x i8> [[TMP27]], <8 x i32> <i32 2, i32 7, i32 5, i32 0, i32 1, i32 3, i32 4, i32 6>
79+
; CHECK-NEXT: br label %[[BB2]]
80+
;
81+
bb:
82+
br label %bb2
83+
84+
bb2:
85+
%getelementptr.i.i.promoted = phi i64 [ 0, %bb ], [ %getelementptr.i.i.promoted346, %bb12.8.i ]
86+
%alloca.promoted = phi i64 [ 0, %bb ], [ %alloca.promoted344, %bb12.8.i ]
87+
%load.8.i231 = phi i8 [ 0, %bb ], [ %load.8.i239, %bb12.8.i ]
88+
%load.7.i217 = phi i8 [ 0, %bb ], [ %load.7.i225, %bb12.8.i ]
89+
%load.626.i200 = phi i8 [ 0, %bb ], [ %load.626.i208, %bb12.8.i ]
90+
%load.6.i183 = phi i8 [ 0, %bb ], [ %load.6.i191, %bb12.8.i ]
91+
%load.5.i167 = phi i8 [ 0, %bb ], [ %load.5.i175, %bb12.8.i ]
92+
%load.418.i148 = phi i8 [ 0, %bb ], [ %load.418.i156, %bb12.8.i ]
93+
%load.4.i129 = phi i8 [ 0, %bb ], [ %load.4.i137, %bb12.8.i ]
94+
%load.3.i111 = phi i8 [ 0, %bb ], [ %load.3.i119, %bb12.8.i ]
95+
br i1 %load1.i, label %spam.exit, label %bb4.lr.ph.i
96+
97+
bb4.lr.ph.i:
98+
br i1 true, label %bb3.i.i.peel, label %eggs.exit.i.peel
99+
100+
bb3.i.i.peel:
101+
%and.i.i.peel = and i64 %alloca.promoted, 1
102+
%load4.i.i.peel = load i64, ptr %arg1, align 8
103+
%shl.i.i.peel = shl i64 %load4.i.i.peel, 1
104+
%or.i.i.peel = or i64 %shl.i.i.peel, %and.i.i.peel
105+
%and6.i.i.peel = and i64 %getelementptr.i.i.promoted, 1
106+
%xor.i.i.peel = xor i64 %and6.i.i.peel, %alloca.promoted
107+
br label %eggs.exit.i.peel
108+
109+
eggs.exit.i.peel:
110+
%load5.i.i93.peel = phi i64 [ %xor.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ]
111+
%or.i.i91.peel = phi i64 [ %or.i.i.peel, %bb3.i.i.peel ], [ 0, %bb4.lr.ph.i ]
112+
%0 = trunc i64 %or.i.i91.peel to i8
113+
%1 = trunc nuw i64 %or.i.i91.peel to i8
114+
%2 = trunc i64 %load5.i.i93.peel to i8
115+
br label %spam.exit
116+
117+
spam.exit:
118+
%getelementptr.i.i.promoted346 = phi i64 [ %load5.i.i93.peel, %eggs.exit.i.peel ], [ 0, %bb2 ]
119+
%load.834.i = phi i8 [ %2, %eggs.exit.i.peel ], [ 0, %bb2 ]
120+
%load.7.i25 = phi i8 [ %1, %eggs.exit.i.peel ], [ 0, %bb2 ]
121+
%load.8.i = phi i8 [ 0, %eggs.exit.i.peel ], [ 1, %bb2 ]
122+
%load.6.i18 = phi i8 [ %0, %eggs.exit.i.peel ], [ 0, %bb2 ]
123+
br i1 %load1.i, label %bb12.8.i, label %bb12.1.thread.i
124+
125+
bb12.1.thread.i:
126+
%icmp5.3.i = icmp eq i8 %load.3.i111, 0
127+
br i1 %icmp5.3.i, label %bb12.3.i, label %bb8.3.i
128+
129+
bb8.3.i:
130+
br label %bb12.3.i
131+
132+
bb12.3.i:
133+
%icmp5.4.i = icmp eq i8 %load.4.i129, 0
134+
br i1 %icmp5.4.i, label %bb12.4.i, label %bb8.4.i
135+
136+
bb8.4.i:
137+
br label %bb12.4.i
138+
139+
bb12.4.i:
140+
%icmp5.5.i = icmp eq i8 %load.5.i167, 0
141+
br i1 %icmp5.5.i, label %bb12.5.i, label %bb8.5.i
142+
143+
bb8.5.i:
144+
br label %bb12.5.i
145+
146+
bb12.5.i:
147+
%icmp5.7.i = icmp eq i8 %load.7.i217, 0
148+
br i1 %icmp5.7.i, label %bb12.7.i, label %bb8.7.i
149+
150+
bb8.7.i:
151+
br label %bb12.7.i
152+
153+
bb12.7.i:
154+
%icmp5.8.i = icmp eq i8 %load.8.i231, 0
155+
br i1 %icmp5.8.i, label %bb12.8.i, label %bb8.8.i
156+
157+
bb8.8.i:
158+
br label %bb12.8.i
159+
160+
bb12.8.i:
161+
%load.8.i239 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.834.i, %spam.exit ]
162+
%load.7.i225 = phi i8 [ 0, %bb12.7.i ], [ %load.311.i, %bb8.8.i ], [ %load.7.i25, %spam.exit ]
163+
%load.626.i208 = phi i8 [ 0, %bb12.7.i ], [ %load.8.i, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
164+
%load.6.i191 = phi i8 [ %load.311.i, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
165+
%load.5.i175 = phi i8 [ 0, %bb12.7.i ], [ %load.6.i183, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
166+
%load.418.i156 = phi i8 [ 0, %bb12.7.i ], [ %load.626.i200, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
167+
%load.4.i137 = phi i8 [ 0, %bb12.7.i ], [ %load.418.i148, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
168+
%load.3.i119 = phi i8 [ 0, %bb12.7.i ], [ 0, %bb8.8.i ], [ %load.6.i18, %spam.exit ]
169+
br label %bb2
170+
}

0 commit comments

Comments
 (0)