Skip to content

Commit da33d40

Browse files
anton-afanasyevtstellar
authored andcommitted
[SLP] Don't try to vectorize pair with insertelement
Particularly this breaks vectorization of insertelements where some of intermediate (i.e. not last) insertelements are used externally. Fixes PR52275 Fixes #51617 Reviewed by: ABataev Differential Revision: https://reviews.llvm.org/D119679 (cherry picked from commit b7574b0)
1 parent 3001b0d commit da33d40

File tree

2 files changed

+140
-17
lines changed

2 files changed

+140
-17
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3849,13 +3849,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
38493849
ValueSet SourceVectors;
38503850
for (Value *V : VL) {
38513851
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
3852-
if (getInsertIndex(V) == None) {
3853-
LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
3854-
"non-constant or undef index.\n");
3855-
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
3856-
BS.cancelScheduling(VL, VL0);
3857-
return;
3858-
}
3852+
assert(getInsertIndex(V) != None && "Non-constant or undef index?");
38593853
}
38603854

38613855
if (count_if(VL, [&SourceVectors](Value *V) {
@@ -8343,6 +8337,8 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
83438337
bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
83448338
if (!A || !B)
83458339
return false;
8340+
if (isa<InsertElementInst>(A) || isa<InsertElementInst>(B))
8341+
return false;
83468342
Value *VL[] = {A, B};
83478343
return tryToVectorizeList(VL, R);
83488344
}
Lines changed: 137 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- | FileCheck %s
33

4-
define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) {
5-
; CHECK-LABEL: @pr52275(
6-
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1
7-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>*
8-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4
9-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
10-
; CHECK-NEXT: [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
11-
; CHECK-NEXT: [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]]
12-
; CHECK-NEXT: ret <4 x i8> [[V2]]
13-
;
4+
define <4 x i8> @test(<4 x i8> %v, i8* %x) {
145
%x0 = load i8, i8* %x, align 4
156
%g1 = getelementptr inbounds i8, i8* %x, i64 1
167
%x1 = load i8, i8* %g1, align 4
@@ -19,3 +10,139 @@ define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) {
1910
%v2 = add <4 x i8> %v0, %v1
2011
ret <4 x i8> %v2
2112
}
13+
14+
define <2 x i8> @test2(<2 x i8> %t6, i32* %t1) {
15+
; CHECK-LABEL: @test2(
16+
; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
17+
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
18+
; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
19+
; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
20+
; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
21+
; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
22+
; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
23+
; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
24+
; CHECK-NEXT: ret <2 x i8> [[T11]]
25+
;
26+
; FORCE_SLP-LABEL: @test2(
27+
; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
28+
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
29+
; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
30+
; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
31+
; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
32+
; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
33+
; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
34+
; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
35+
; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
36+
;
37+
%t3 = load i32, i32* %t1, align 4
38+
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
39+
%t5 = load i32, i32* %t4, align 4
40+
%t7 = trunc i32 %t3 to i8
41+
%t8 = insertelement <2 x i8> %t6, i8 %t7, i64 0
42+
%t9 = trunc i32 %t5 to i8
43+
%t10 = insertelement <2 x i8> %t8, i8 %t9, i64 1
44+
%t11 = add <2 x i8> %t10, %t8
45+
ret <2 x i8> %t11
46+
}
47+
48+
define <2 x i8> @test_reorder(<2 x i8> %t6, i32* %t1) {
49+
; CHECK-LABEL: @test_reorder(
50+
; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
51+
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
52+
; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
53+
; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
54+
; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
55+
; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
56+
; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
57+
; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
58+
; CHECK-NEXT: ret <2 x i8> [[T11]]
59+
;
60+
; FORCE_SLP-LABEL: @test_reorder(
61+
; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
62+
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
63+
; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
64+
; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
65+
; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
66+
; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
67+
; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
68+
; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
69+
; FORCE_SLP-NEXT: ret <2 x i8> [[T11]]
70+
;
71+
%t3 = load i32, i32* %t1, align 4
72+
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
73+
%t5 = load i32, i32* %t4, align 4
74+
%t7 = trunc i32 %t3 to i8
75+
%t8 = insertelement <2 x i8> %t6, i8 %t7, i64 1
76+
%t9 = trunc i32 %t5 to i8
77+
%t10 = insertelement <2 x i8> %t8, i8 %t9, i64 0
78+
%t11 = add <2 x i8> %t10, %t8
79+
ret <2 x i8> %t11
80+
}
81+
82+
define <4 x i8> @test_subvector(<4 x i8> %t6, i32* %t1) {
83+
; CHECK-LABEL: @test_subvector(
84+
; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
85+
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
86+
; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
87+
; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
88+
; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
89+
; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
90+
; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
91+
; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
92+
; CHECK-NEXT: ret <4 x i8> [[T11]]
93+
;
94+
; FORCE_SLP-LABEL: @test_subvector(
95+
; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
96+
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
97+
; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
98+
; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
99+
; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
100+
; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
101+
; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
102+
; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
103+
; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
104+
;
105+
%t3 = load i32, i32* %t1, align 4
106+
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
107+
%t5 = load i32, i32* %t4, align 4
108+
%t7 = trunc i32 %t3 to i8
109+
%t8 = insertelement <4 x i8> %t6, i8 %t7, i64 0
110+
%t9 = trunc i32 %t5 to i8
111+
%t10 = insertelement <4 x i8> %t8, i8 %t9, i64 1
112+
%t11 = add <4 x i8> %t10, %t8
113+
ret <4 x i8> %t11
114+
}
115+
116+
define <4 x i8> @test_subvector_reorder(<4 x i8> %t6, i32* %t1) {
117+
; CHECK-LABEL: @test_subvector_reorder(
118+
; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
119+
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
120+
; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
121+
; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
122+
; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
123+
; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
124+
; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
125+
; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
126+
; CHECK-NEXT: ret <4 x i8> [[T11]]
127+
;
128+
; FORCE_SLP-LABEL: @test_subvector_reorder(
129+
; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, i32* [[T1:%.*]], align 4
130+
; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 1
131+
; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, i32* [[T4]], align 4
132+
; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8
133+
; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
134+
; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8
135+
; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
136+
; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
137+
; FORCE_SLP-NEXT: ret <4 x i8> [[T11]]
138+
;
139+
%t3 = load i32, i32* %t1, align 4
140+
%t4 = getelementptr inbounds i32, i32* %t1, i64 1
141+
%t5 = load i32, i32* %t4, align 4
142+
%t7 = trunc i32 %t3 to i8
143+
%t8 = insertelement <4 x i8> %t6, i8 %t7, i64 3
144+
%t9 = trunc i32 %t5 to i8
145+
%t10 = insertelement <4 x i8> %t8, i8 %t9, i64 2
146+
%t11 = add <4 x i8> %t10, %t8
147+
ret <4 x i8> %t11
148+
}

0 commit comments

Comments
 (0)