1- ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2- ; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+ ; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck --check-prefix=VF4IC1 %s
3+ ; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck --check-prefix=VF2IC2 %s
34
45define void @narrow_select_to_single_scalar (i1 %invar.cond , ptr noalias %A , ptr noalias %B , ptr noalias %C ) {
5- ; CHECK-LABEL: define void @narrow_select_to_single_scalar(
6- ; CHECK-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
7- ; CHECK-NEXT: [[ENTRY:.*:]]
8- ; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
9- ; CHECK: [[VECTOR_PH]]:
10- ; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
11- ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]]
12- ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
13- ; CHECK: [[VECTOR_BODY]]:
14- ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
15- ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
16- ; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
17- ; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1
18- ; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2
19- ; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3
20- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]]
21- ; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
22- ; CHECK-NEXT: store i16 [[TMP7]], ptr [[B]], align 1
23- ; CHECK-NEXT: store i16 0, ptr [[TMP1]], align 1
24- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
25- ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
26- ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
27- ; CHECK: [[MIDDLE_BLOCK]]:
28- ; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
29- ; CHECK: [[SCALAR_PH]]:
30- ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
31- ; CHECK: [[LOOP_HEADER]]:
32- ; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ]
33- ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i16 [[IV]]
34- ; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 1
35- ; CHECK-NEXT: store i16 [[L_0]], ptr [[B]], align 1
36- ; CHECK-NEXT: [[INVAR_SEL:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
37- ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i16 [[INVAR_SEL]]
38- ; CHECK-NEXT: store i16 0, ptr [[GEP_C]], align 1
39- ; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
40- ; CHECK-NEXT: [[EC:%.*]] = icmp ne i16 [[IV]], 1024
41- ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
42- ; CHECK: [[EXIT]]:
43- ; CHECK-NEXT: ret void
6+ ; VF4IC1-LABEL: define void @narrow_select_to_single_scalar(
7+ ; VF4IC1-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
8+ ; VF4IC1-NEXT: [[ENTRY:.*:]]
9+ ; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]]
10+ ; VF4IC1: [[VECTOR_PH]]:
11+ ; VF4IC1-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
12+ ; VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]]
13+ ; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
14+ ; VF4IC1: [[VECTOR_BODY]]:
15+ ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
16+ ; VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
17+ ; VF4IC1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
18+ ; VF4IC1-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1
19+ ; VF4IC1-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2
20+ ; VF4IC1-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3
21+ ; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]]
22+ ; VF4IC1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
23+ ; VF4IC1-NEXT: store i16 [[TMP7]], ptr [[B]], align 1
24+ ; VF4IC1-NEXT: store i16 0, ptr [[TMP1]], align 1
25+ ; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
26+ ; VF4IC1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
27+ ; VF4IC1-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
28+ ; VF4IC1: [[MIDDLE_BLOCK]]:
29+ ; VF4IC1-NEXT: br label %[[EXIT:.*]]
30+ ; VF4IC1: [[EXIT]]:
31+ ; VF4IC1-NEXT: ret void
32+ ;
33+ ; VF2IC2-LABEL: define void @narrow_select_to_single_scalar(
34+ ; VF2IC2-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
35+ ; VF2IC2-NEXT: [[ENTRY:.*:]]
36+ ; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]]
37+ ; VF2IC2: [[VECTOR_PH]]:
38+ ; VF2IC2-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
39+ ; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]]
40+ ; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
41+ ; VF2IC2: [[VECTOR_BODY]]:
42+ ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
43+ ; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
44+ ; VF2IC2-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2
45+ ; VF2IC2-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3
46+ ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP3]]
47+ ; VF2IC2-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 1
48+ ; VF2IC2-NEXT: store i16 [[TMP5]], ptr [[B]], align 1
49+ ; VF2IC2-NEXT: store i16 0, ptr [[TMP1]], align 1
50+ ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
51+ ; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
52+ ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
53+ ; VF2IC2: [[MIDDLE_BLOCK]]:
54+ ; VF2IC2-NEXT: br label %[[EXIT:.*]]
55+ ; VF2IC2: [[EXIT]]:
56+ ; VF2IC2-NEXT: ret void
4457;
4558entry:
4659 br label %loop.header
@@ -54,15 +67,88 @@ loop.header:
5467 %gep.C = getelementptr i16 , ptr %C , i16 %invar.sel
5568 store i16 0 , ptr %gep.C , align 1
5669 %iv.next = add i16 %iv , 1
57- %ec = icmp ne i16 %iv , 1024
70+ %ec = icmp ne i16 %iv.next , 1024
5871 br i1 %ec , label %loop.header , label %exit
5972
6073exit:
6174 ret void
6275}
63- ;.
64- ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
65- ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
66- ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
67- ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
68- ;.
76+
77+ ; FIXME: Currently this mis-compiled when interleaving; all stores store the
78+ ; last lane of the last part, instead of the last lane per part.
79+ ; Test case for https://github.com/llvm/llvm-project/issues/162498.
80+ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts (ptr %dst ) {
81+ ; VF4IC1-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
82+ ; VF4IC1-SAME: ptr [[DST:%.*]]) {
83+ ; VF4IC1-NEXT: [[ENTRY:.*:]]
84+ ; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]]
85+ ; VF4IC1: [[VECTOR_PH]]:
86+ ; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
87+ ; VF4IC1: [[VECTOR_BODY]]:
88+ ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
89+ ; VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
90+ ; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
91+ ; VF4IC1-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
92+ ; VF4IC1-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
93+ ; VF4IC1-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
94+ ; VF4IC1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[VEC_IND]], splat (i32 1)
95+ ; VF4IC1-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
96+ ; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP5]]
97+ ; VF4IC1-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
98+ ; VF4IC1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP7]]
99+ ; VF4IC1-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
100+ ; VF4IC1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP9]]
101+ ; VF4IC1-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
102+ ; VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP11]]
103+ ; VF4IC1-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4
104+ ; VF4IC1-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4
105+ ; VF4IC1-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4
106+ ; VF4IC1-NEXT: store i32 [[TMP3]], ptr [[TMP12]], align 4
107+ ; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
108+ ; VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
109+ ; VF4IC1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
110+ ; VF4IC1-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
111+ ; VF4IC1: [[MIDDLE_BLOCK]]:
112+ ; VF4IC1-NEXT: br label %[[EXIT:.*]]
113+ ; VF4IC1: [[EXIT]]:
114+ ; VF4IC1-NEXT: ret void
115+ ;
116+ ; VF2IC2-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
117+ ; VF2IC2-SAME: ptr [[DST:%.*]]) {
118+ ; VF2IC2-NEXT: [[ENTRY:.*:]]
119+ ; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]]
120+ ; VF2IC2: [[VECTOR_PH]]:
121+ ; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
122+ ; VF2IC2: [[VECTOR_BODY]]:
123+ ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
124+ ; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2
125+ ; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3
126+ ; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1
127+ ; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1
128+ ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]]
129+ ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]]
130+ ; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4
131+ ; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4
132+ ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
133+ ; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
134+ ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
135+ ; VF2IC2: [[MIDDLE_BLOCK]]:
136+ ; VF2IC2-NEXT: br label %[[EXIT:.*]]
137+ ; VF2IC2: [[EXIT]]:
138+ ; VF2IC2-NEXT: ret void
139+ ;
140+ entry:
141+ br label %loop
142+
143+ loop:
144+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
145+ %iv.shift = lshr i32 %iv , 1
146+ %gep.dst = getelementptr i32 , ptr %dst , i32 %iv.shift
147+ store i32 %iv , ptr %gep.dst , align 4
148+ %iv.next = add i32 %iv , 1
149+ %ec = icmp eq i32 %iv.next , 100
150+ br i1 %ec , label %exit , label %loop
151+
152+ exit:
153+ ret void
154+ }
0 commit comments