Skip to content

Commit 306b5a3

Browse files
committed
[SLP]Do not consider split nodes, when checking parent PHI-based nodes
The compiler should not consider split vectorize nodes, when checking for non-schedulable PHI-based parent nodes. Only pure PHI nodes must be considered, they only can be considered as explicit users, split nodes are not. Fixes #168268
1 parent 7761a89 commit 306b5a3

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5340,6 +5340,7 @@ class BoUpSLP {
53405340
bool IsNonSchedulableWithParentPhiNode =
53415341
TE->doesNotNeedToSchedule() && TE->UserTreeIndex &&
53425342
TE->UserTreeIndex.UserTE->hasState() &&
5343+
TE->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
53435344
TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
53445345
// Count the number of unique phi nodes, which are the parent for
53455346
// parent entry, and exit, if all the unique phis are processed.
@@ -5391,6 +5392,7 @@ class BoUpSLP {
53915392
bool IsNonSchedulableWithParentPhiNode =
53925393
P.first->doesNotNeedToSchedule() && P.first->UserTreeIndex &&
53935394
P.first->UserTreeIndex.UserTE->hasState() &&
5395+
P.first->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
53945396
P.first->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
53955397
auto *It = find(P.first->Scalars, User);
53965398
do {
@@ -5690,6 +5692,8 @@ class BoUpSLP {
56905692
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
56915693
Bundle->getTreeEntry()->UserTreeIndex &&
56925694
Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() &&
5695+
Bundle->getTreeEntry()->UserTreeIndex.UserTE->State !=
5696+
TreeEntry::SplitVectorize &&
56935697
Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() ==
56945698
Instruction::PHI;
56955699
// Count the number of unique phi nodes, which are the parent for
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @main(ptr %c, i32 %0, i1 %tobool4.not, i16 %1) {
5+
; CHECK-LABEL: define i32 @main(
6+
; CHECK-SAME: ptr [[C:%.*]], i32 [[TMP0:%.*]], i1 [[TOBOOL4_NOT:%.*]], i16 [[TMP1:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP0]], i32 1
9+
; CHECK-NEXT: br label %[[IF_END:.*]]
10+
; CHECK: [[IF_END]]:
11+
; CHECK-NEXT: [[B_0_PH:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP32:%.*]], %[[WHILE_COND_PREHEADER:.*]] ]
12+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP33:%.*]], %[[WHILE_COND_PREHEADER]] ]
13+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[B_0_PH]], i32 0
14+
; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[R:.*]], label %[[IF_END9:.*]]
15+
; CHECK: [[IF_END9]]:
16+
; CHECK-NEXT: [[CONV11:%.*]] = sext i16 [[TMP1]] to i32
17+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[CONV11]], i32 0
18+
; CHECK-NEXT: br label %[[R]]
19+
; CHECK: [[R]]:
20+
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ <i32 1, i32 0>, %[[IF_END9]] ], [ [[TMP2]], %[[IF_END]] ]
21+
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP5]], %[[IF_END9]] ], [ [[TMP4]], %[[IF_END]] ]
22+
; CHECK-NEXT: [[TOBOOL12_NOT:%.*]] = icmp eq i32 [[B_0_PH]], 0
23+
; CHECK-NEXT: br i1 [[TOBOOL12_NOT]], label %[[IF_END14:.*]], label %[[IF_THEN13:.*]]
24+
; CHECK: [[IF_THEN13]]:
25+
; CHECK-NEXT: br label %[[IF_END14]]
26+
; CHECK: [[IF_END14]]:
27+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
28+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP8]], 1
29+
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[AND]], 1
30+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
31+
; CHECK-NEXT: [[AND17:%.*]] = and i32 [[TMP9]], 1
32+
; CHECK-NEXT: [[DIV20:%.*]] = sdiv i32 [[AND17]], [[TMP0]]
33+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[C]], align 4
34+
; CHECK-NEXT: [[AND25:%.*]] = and i32 [[TMP0]], 1
35+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
36+
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[AND17]], i32 1
37+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP10]], i32 2
38+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[AND25]], i32 3
39+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> <i32 0, i32 1, i32 poison, i32 1>, i32 [[DIV20]], i32 2
40+
; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP14]], [[TMP15]]
41+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> poison, i32 [[NOT]], i32 2
42+
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP11]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 5>
43+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
44+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
45+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[B_0_PH]], i32 0
46+
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP0]], i32 2
47+
; CHECK-NEXT: br label %[[AH:.*]]
48+
; CHECK: [[AH]]:
49+
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP21]], %[[AH]] ], [ [[TMP16]], %[[IF_END14]] ]
50+
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP22]], %[[AH]] ], [ [[TMP20]], %[[IF_END14]] ]
51+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2
52+
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
53+
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP24]], <2 x i32> <i32 2, i32 6>
54+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP25]], [[TMP26]]
55+
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1
56+
; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[ADD]], [[TMP28]]
57+
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3
58+
; CHECK-NEXT: [[OR27:%.*]] = or i32 [[TMP29]], [[TMP30]]
59+
; CHECK-NEXT: store i32 [[OR27]], ptr [[C]], align 4
60+
; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[WHILE_COND_PREHEADER]], label %[[AH]]
61+
; CHECK: [[WHILE_COND_PREHEADER]]:
62+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
63+
; CHECK-NEXT: [[CALL69:%.*]] = tail call i32 @s(i32 [[TMP31]])
64+
; CHECK-NEXT: [[TMP32]] = extractelement <4 x i32> [[TMP23]], i32 0
65+
; CHECK-NEXT: [[TMP33]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
66+
; CHECK-NEXT: br label %[[IF_END]]
67+
;
68+
entry:
69+
br label %if.end
70+
71+
if.end:
72+
%n.0.ph = phi i32 [ 0, %entry ], [ %.us-phi52, %while.cond.preheader ]
73+
%b.0.ph = phi i32 [ 0, %entry ], [ %b.2, %while.cond.preheader ]
74+
%a.0.ph = phi i32 [ 0, %entry ], [ %a.2, %while.cond.preheader ]
75+
br i1 %tobool4.not, label %r, label %if.end9
76+
77+
if.end9:
78+
%conv11 = sext i16 %1 to i32
79+
br label %r
80+
81+
r:
82+
%.us-phi51642 = phi i32 [ 0, %if.end9 ], [ %0, %if.end ]
83+
%.us-phi415662 = phi i32 [ 0, %if.end9 ], [ 1, %if.end ]
84+
%b.1 = phi i32 [ %conv11, %if.end9 ], [ %b.0.ph, %if.end ]
85+
%a.1 = phi i32 [ 1, %if.end9 ], [ 0, %if.end ]
86+
%tobool12.not = icmp eq i32 %b.0.ph, 0
87+
br i1 %tobool12.not, label %if.end14, label %if.then13
88+
89+
if.then13:
90+
br label %if.end14
91+
92+
if.end14:
93+
%and = and i32 %n.0.ph, 1
94+
%not = xor i32 %and, 1
95+
%and17 = and i32 %a.0.ph, 1
96+
%not18 = xor i32 %and17, 1
97+
%div20 = sdiv i32 %and17, %0
98+
%2 = load i32, ptr %c, align 4
99+
%3 = xor i32 %2, %div20
100+
%and25 = and i32 %0, 1
101+
%not26 = xor i32 %and25, 1
102+
br label %ah
103+
104+
ah:
105+
%.us-phi4154 = phi i32 [ 0, %ah ], [ %.us-phi415662, %if.end14 ]
106+
%.us-phi52 = phi i32 [ 0, %ah ], [ %.us-phi51642, %if.end14 ]
107+
%b.2 = phi i32 [ %b.0.ph, %ah ], [ %b.1, %if.end14 ]
108+
%a.2 = phi i32 [ 0, %ah ], [ %a.1, %if.end14 ]
109+
%l.1 = phi i32 [ %0, %ah ], [ %not, %if.end14 ]
110+
%p16.1 = phi i32 [ 0, %ah ], [ %not18, %if.end14 ]
111+
%q.1 = phi i32 [ 0, %ah ], [ %3, %if.end14 ]
112+
%r23.1 = phi i32 [ 0, %ah ], [ %not26, %if.end14 ]
113+
%add = add i32 %q.1, %l.1
114+
%4 = or i32 %add, %p16.1
115+
%or27 = or i32 %4, %r23.1
116+
store i32 %or27, ptr %c, align 4
117+
br i1 %tobool4.not, label %while.cond.preheader, label %ah
118+
119+
while.cond.preheader:
120+
%call69 = tail call i32 @s(i32 %.us-phi4154)
121+
br label %if.end
122+
}
123+
124+
declare i32 @s(i32)

0 commit comments

Comments
 (0)