Skip to content

Commit 2824b3c

Browse files
committed
[SLP] Try to recalculate deps only for nodes with previously valid deps
Need to recalculate the dependencies only for nodes, which have valid deps before they gets cleared because of the copyable nodes. Otherwise, no need to recaculate the dependencies to prevent a crash.
1 parent d39772c commit 2824b3c

File tree

2 files changed

+187
-2
lines changed

2 files changed

+187
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20790,7 +20790,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2079020790
if (auto *Op = dyn_cast<Instruction>(U.get());
2079120791
Op && areAllOperandsReplacedByCopyableData(SD->getInst(), Op,
2079220792
*SLP, NumOps)) {
20793-
if (ScheduleData *OpSD = getScheduleData(Op)) {
20793+
if (ScheduleData *OpSD = getScheduleData(Op);
20794+
OpSD && OpSD->hasValidDependencies()) {
2079420795
OpSD->clearDirectDependencies();
2079520796
if (RegionHasStackSave ||
2079620797
!isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
@@ -20976,7 +20977,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
2097620977
ScheduleCopyableDataMapByUsers.erase(I);
2097720978
ScheduleCopyableDataMap.erase(KV);
2097820979
// Need to recalculate dependencies for the actual schedule data.
20979-
if (ScheduleData *OpSD = getScheduleData(I)) {
20980+
if (ScheduleData *OpSD = getScheduleData(I);
20981+
OpSD && OpSD->hasValidDependencies()) {
2098020982
OpSD->clearDirectDependencies();
2098120983
if (RegionHasStackSave ||
2098220984
!isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i16 @test() {
5+
; CHECK-LABEL: define i16 @test() {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 0, 0
8+
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 0
9+
; CHECK-NEXT: [[CALL99_I:%.*]] = call i32 @llvm.bswap.i32(i32 0)
10+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[CALL99_I]], 0
11+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0
12+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i32 1
13+
; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
14+
; CHECK-NEXT: [[TMP6:%.*]] = shl i32 0, 0
15+
; CHECK-NEXT: [[UNSCLEAR186_I:%.*]] = and i32 [[TMP6]], 0
16+
; CHECK-NEXT: [[TMP7:%.*]] = shl i32 0, 0
17+
; CHECK-NEXT: [[CALL7_I45:%.*]] = tail call i32 null(i32 0)
18+
; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[CALL7_I45]], 0
19+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
20+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP8]], i32 1
21+
; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP10]], zeroinitializer
22+
; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 0, 0
23+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[TMP12]], i32 0
24+
; CHECK-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[TMP13]], zeroinitializer
25+
; CHECK-NEXT: [[TMP15:%.*]] = and <2 x i32> [[TMP14]], zeroinitializer
26+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <24 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
27+
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <24 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0>, <24 x i32> [[TMP16]], <24 x i32> <i32 0, i32 1, i32 24, i32 25, i32 poison, i32 5, i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 15, i32 poison, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 22, i32 23>
28+
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <24 x i32> [[TMP17]], <24 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 24, i32 5, i32 26, i32 7, i32 28, i32 29, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 15, i32 poison, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 22, i32 23>
29+
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <24 x i32> [[TMP18]], i32 [[UNSCLEAR186_I]], i32 10
30+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <24 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
31+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <24 x i32> [[TMP19]], <24 x i32> [[TMP20]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
32+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <24 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
33+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <24 x i32> [[TMP21]], <24 x i32> [[TMP22]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 24, i32 15, i32 25, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 22, i32 23>
34+
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <24 x i32> [[TMP23]], <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 24, i32 25, i32 26, i32 27, i32 22, i32 23>
35+
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <24 x i32> [[TMP24]], <24 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <24 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 4, i32 30, i32 6, i32 32, i32 33, i32 34, i32 poison, i32 36, i32 37, i32 38, i32 poison, i32 40, i32 poison, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
36+
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <24 x i32> [[TMP25]], i32 [[UNSCLEAR186_I]], i32 11
37+
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <24 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
38+
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <24 x i32> [[TMP26]], <24 x i32> [[TMP27]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 24, i32 16, i32 26, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
39+
; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <24 x i32> [[TMP24]], [[TMP28]]
40+
; CHECK-NEXT: [[RDX_OP:%.*]] = shufflevector <24 x i1> [[TMP29]], <24 x i1> <i1 false, i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
41+
; CHECK-NEXT: [[TMP30:%.*]] = bitcast <28 x i1> [[RDX_OP]] to i28
42+
; CHECK-NEXT: [[TMP31:%.*]] = call i28 @llvm.ctpop.i28(i28 [[TMP30]])
43+
; CHECK-NEXT: [[TMP32:%.*]] = trunc i28 [[TMP31]] to i16
44+
; CHECK-NEXT: [[TMP33:%.*]] = call i4 @llvm.ctpop.i4(i4 -8)
45+
; CHECK-NEXT: [[TMP34:%.*]] = zext i4 [[TMP33]] to i16
46+
; CHECK-NEXT: [[OP_RDX4:%.*]] = add i16 [[TMP34]], [[TMP32]]
47+
; CHECK-NEXT: ret i16 [[OP_RDX4]]
48+
;
49+
entry:
50+
%.not243.i = icmp ne i32 0, 0
51+
%add51.i = zext i1 %.not243.i to i16
52+
%.not244.i = icmp ne i32 0, 0
53+
%add58.i = zext i1 %.not244.i to i16
54+
%add59.i = add i16 %add51.i, %add58.i
55+
%.not247.i = icmp ne i32 0, 0
56+
%add74.i = zext i1 %.not247.i to i16
57+
%add75.i = add i16 %add59.i, %add74.i
58+
%.not248.i = icmp ne i32 0, 0
59+
%add81.i = zext i1 %.not248.i to i16
60+
%add82.i = add i16 %add75.i, %add81.i
61+
%0 = lshr i32 0, 0
62+
%1 = shl i32 %0, 0
63+
%unsclear94.i = and i32 %1, 0
64+
%.not251.i = icmp ne i32 %unsclear94.i, 0
65+
%add97.i = zext i1 %.not251.i to i16
66+
%add98.i = add i16 %add82.i, %add97.i
67+
%call99.i = call i32 @llvm.bswap.i32(i32 0)
68+
%2 = lshr i32 %call99.i, 0
69+
%3 = and i32 %2, 0
70+
%.not252.i = icmp ne i32 %3, 0
71+
%add104.i = zext i1 %.not252.i to i16
72+
%add105.i = add i16 %add98.i, %add104.i
73+
%4 = shl i32 0, 0
74+
%unsclear117.i = and i32 %4, 0
75+
%.not255.i = icmp ne i32 %unsclear117.i, 0
76+
%add120.i = zext i1 %.not255.i to i16
77+
%add121.i = add i16 %add105.i, %add120.i
78+
%.not256.i = icmp ne i32 0, %unsclear117.i
79+
%add127.i = zext i1 %.not256.i to i16
80+
%add128.i = add i16 %add121.i, %add127.i
81+
%5 = shl i32 0, 0
82+
%unsclear140.i = and i32 %5, 0
83+
%.not259.i = icmp ne i32 %unsclear140.i, 0
84+
%add143.i = zext i1 %.not259.i to i16
85+
%add144.i = add i16 %add128.i, %add143.i
86+
%.not260.i = icmp ne i32 0, %unsclear140.i
87+
%add150.i = zext i1 %.not260.i to i16
88+
%add151.i = add i16 %add144.i, %add150.i
89+
%6 = shl i32 0, 0
90+
%unsclear163.i = and i32 %6, 0
91+
%.not263.i = icmp ne i32 %unsclear163.i, 0
92+
%add166.i = zext i1 %.not263.i to i16
93+
%add167.i = add i16 %add151.i, %add166.i
94+
%unsclear169.i = shl i32 0, 0
95+
%unsclear170.i = and i32 %unsclear169.i, 0
96+
%.not264.i = icmp ne i32 %unsclear170.i, 0
97+
%add173.i = zext i1 %.not264.i to i16
98+
%add174.i = add i16 %add167.i, %add173.i
99+
%7 = shl i32 0, 0
100+
%unsclear186.i = and i32 %7, 0
101+
%.not267.i = icmp ne i32 %unsclear186.i, 0
102+
%add189.i = zext i1 %.not267.i to i16
103+
%add190.i = add i16 %add174.i, %add189.i
104+
%.not268.i = icmp ne i32 0, %unsclear186.i
105+
%add196.i = zext i1 %.not268.i to i16
106+
%add197.i = add i16 %add190.i, %add196.i
107+
%8 = shl i32 0, 0
108+
%unsclear4.i42 = and i32 %8, 0
109+
%.not.i43 = icmp ne i32 %unsclear4.i42, 0
110+
%add.i = zext i1 %.not.i43 to i16
111+
%add6.i44 = add i16 %add197.i, %add.i
112+
%call7.i45 = tail call i32 null(i32 0)
113+
%9 = lshr i32 %call7.i45, 0
114+
%10 = and i32 %9, 0
115+
%.not209.i = icmp ne i32 %10, 0
116+
%add12.i46 = zext i1 %.not209.i to i16
117+
%add13.i47 = add i16 %add6.i44, %add12.i46
118+
%11 = lshr i32 0, 0
119+
%12 = shl i32 %11, 0
120+
%unsclear25.i51 = and i32 %12, 0
121+
%.not212.i = icmp ne i32 %unsclear25.i51, 0
122+
%add28.i52 = zext i1 %.not212.i to i16
123+
%add29.i53 = add i16 %add13.i47, %add28.i52
124+
%.not213.i = icmp ne i32 0, %unsclear25.i51
125+
%add35.i55 = zext i1 %.not213.i to i16
126+
%add36.i56 = add i16 %add29.i53, %add35.i55
127+
%13 = shl i32 0, 0
128+
%unsclear48.i60 = and i32 %13, 0
129+
%.not216.i = icmp ne i32 %unsclear48.i60, 0
130+
%add51.i61 = zext i1 %.not216.i to i16
131+
%add52.i62 = add i16 %add36.i56, %add51.i61
132+
%.not217.i = icmp ne i32 0, %unsclear48.i60
133+
%add58.i64 = zext i1 %.not217.i to i16
134+
%add59.i65 = add i16 %add52.i62, %add58.i64
135+
%14 = shl i32 0, 0
136+
%unsclear71.i69 = and i32 %14, 0
137+
%.not220.i = icmp ne i32 %unsclear71.i69, 0
138+
%add74.i70 = zext i1 %.not220.i to i16
139+
%add75.i71 = add i16 %add59.i65, %add74.i70
140+
%15 = shl i32 0, 0
141+
%unsclear78.i = and i32 %15, 0
142+
%.not221.i = icmp ne i32 %unsclear78.i, 0
143+
%add81.i73 = zext i1 %.not221.i to i16
144+
%add82.i74 = add i16 %add75.i71, %add81.i73
145+
%16 = shl i32 0, 0
146+
%unsclear94.i78 = and i32 %16, 0
147+
%.not224.i = icmp ne i32 %unsclear94.i78, 0
148+
%add97.i79 = zext i1 %.not224.i to i16
149+
%add98.i80 = add i16 %add82.i74, %add97.i79
150+
%17 = shl i32 0, 0
151+
%unsclear101.i = and i32 %17, 0
152+
%.not225.i = icmp ne i32 %unsclear101.i, 0
153+
%add104.i82 = zext i1 %.not225.i to i16
154+
%add105.i83 = add i16 %add98.i80, %add104.i82
155+
%.not229.i = icmp ne i32 0, 0
156+
%add127.i91 = zext i1 %.not229.i to i16
157+
%add128.i92 = add i16 %add105.i83, %add127.i91
158+
%.not232.i = icmp ne i32 0, 0
159+
%add143.i97 = zext i1 %.not232.i to i16
160+
%add144.i98 = add i16 %add128.i92, %add143.i97
161+
%.not233.i = icmp ne i32 0, 0
162+
%add150.i100 = zext i1 %.not233.i to i16
163+
%add151.i101 = add i16 %add144.i98, %add150.i100
164+
%.not236.i106 = icmp ne i32 0, 0
165+
%add166.i107 = zext i1 %.not236.i106 to i16
166+
%add167.i108 = add i16 %add151.i101, %add166.i107
167+
%.not237.i = icmp ne i32 0, 0
168+
%add173.i110 = zext i1 %.not237.i to i16
169+
%add174.i111 = add i16 %add167.i108, %add173.i110
170+
%.not.i118 = icmp ne i32 0, 0
171+
%add.i119 = zext i1 %.not.i118 to i16
172+
%add6.i120 = add i16 %add174.i111, %add.i119
173+
%.not209.i122 = icmp ne i32 0, 0
174+
%add12.i123 = zext i1 %.not209.i122 to i16
175+
%add13.i124 = add i16 %add6.i120, %add12.i123
176+
%.not212.i131 = icmp ne i32 1, 0
177+
%add28.i132 = zext i1 %.not212.i131 to i16
178+
%add29.i133 = add i16 %add13.i124, %add28.i132
179+
ret i16 %add29.i133
180+
}
181+
182+
declare i32 @llvm.bswap.i32(i32)
183+

0 commit comments

Comments
 (0)