1- ; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 | FileCheck %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
2+ ; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization \
3+ ; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
4+ ; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
5+ ; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
26
3- ; CHECK-LABEL: @f0(
4- ;
5- ; CHECK: entry:
6- ; CHECK: br i1 %cmp.entry, label %iter.check, label %exit, !prof [[PROF_F0_ENTRY:![0-9]+]]
7- ;
8- ; CHECK: iter.check:
9- ; CHECK: br i1 %min.iters.check, label %vec.epilog.scalar.ph, label %vector.scevcheck, !prof [[PROF_F0_UNLIKELY:![0-9]+]]
10- ;
11- ; CHECK: vector.scevcheck:
12- ; CHECK: br i1 %4, label %vec.epilog.scalar.ph, label %vector.main.loop.iter.check, !prof [[PROF_F0_UNLIKELY]]
13- ;
14- ; CHECK: vector.main.loop.iter.check:
15- ; CHECK: br i1 %min.iters.check1, label %vec.epilog.ph, label %vector.ph, !prof [[PROF_F0_UNLIKELY]]
16- ;
17- ; CHECK: vector.ph:
18- ; CHECK: br label %vector.body
19- ;
20- ; CHECK: vector.body:
21- ; CHECK: br i1 {{.+}}, label %middle.block, label %vector.body, !prof [[PROF_F0_VECTOR_BODY:![0-9]+]]
22- ;
23- ; CHECK: middle.block:
24- ; CHECK: br i1 %cmp.n, label %exit.loopexit, label %vec.epilog.iter.check, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]]
25- ;
26- ; CHECK: vec.epilog.iter.check:
27- ; CHECK: br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph, !prof [[PROF_F0_VEC_EPILOGUE_SKIP:![0-9]+]]
28- ;
29- ; CHECK: vec.epilog.ph:
30- ; CHECK: br label %vec.epilog.vector.body
31- ;
32- ; CHECK: vec.epilog.vector.body:
33- ; CHECK: br i1 {{.+}}, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]]
34- ;
35- ; CHECK: vec.epilog.middle.block:
36- ; CHECK: br i1 %cmp.n{{.+}}, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]]
37- ;
38- ; CHECK: vec.epilog.scalar.ph:
39- ; CHECK: br label %loop
40- ;
41- ; CHECK: loop:
42- ; CHECK: br i1 %cmp.loop, label %loop, label %exit.loopexit, !prof [[PROF_F0_LOOP:![0-9]+]]
7+ ; FIXME: For MAINVF4IC2_EPI4 the branch weights in the terminator of
8+ ; the VEC_EPILOG_ITER_CHECK block should be [4,4] since we process 8
9+ ; scalar iterations in the main loop, leaving the remaining count to
10+ ; be in the range [0,7]. That gives a 4:4 chance of skipping the
11+ ; vector epilogue. I believe the problem lies in
12+ ; EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
13+ ; where the main loop VF is set to the same value as the epilogue VF.
14+ define void @f0 (i8 %n , i32 %len , ptr %p ) !prof !0 {
15+ ; MAINVF4IC1_EPI4-LABEL: define void @f0(
16+ ; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
17+ ; MAINVF4IC1_EPI4: [[ENTRY:.*:]]
18+ ; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
19+ ; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
20+ ; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
21+ ; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
22+ ; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
23+ ; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
24+ ; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
25+ ; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
26+ ; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
27+ ; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
28+ ; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 4
29+ ; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
30+ ; MAINVF4IC1_EPI4: [[VECTOR_PH]]:
31+ ; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]]
32+ ; MAINVF4IC1_EPI4: [[VECTOR_BODY]]:
33+ ; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
34+ ; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
35+ ; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]:
36+ ; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
37+ ; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
38+ ; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
39+ ; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
40+ ; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
41+ ; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]:
42+ ; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
43+ ; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
44+ ; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
45+ ; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
46+ ; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
47+ ; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
48+ ; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
49+ ; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
50+ ; MAINVF4IC1_EPI4: br label %[[LOOP:.*]]
51+ ; MAINVF4IC1_EPI4: [[LOOP]]:
52+ ; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
53+ ; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]]
54+ ; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]:
55+ ; MAINVF4IC1_EPI4: br label %[[EXIT]]
56+ ; MAINVF4IC1_EPI4: [[EXIT]]:
4357;
44- ; CHECK: exit.loopexit:
45- ; CHECK: br label %exit
58+ ; MAINVF4IC2_EPI4-LABEL: define void @f0(
59+ ; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
60+ ; MAINVF4IC2_EPI4: [[ENTRY:.*:]]
61+ ; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
62+ ; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
63+ ; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
64+ ; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
65+ ; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
66+ ; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
67+ ; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
68+ ; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
69+ ; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
70+ ; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
71+ ; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
72+ ; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
73+ ; MAINVF4IC2_EPI4: [[VECTOR_PH]]:
74+ ; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]]
75+ ; MAINVF4IC2_EPI4: [[VECTOR_BODY]]:
76+ ; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
77+ ; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
78+ ; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]:
79+ ; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
80+ ; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]]
81+ ; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
82+ ; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
83+ ; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
84+ ; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]:
85+ ; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
86+ ; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
87+ ; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
88+ ; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
89+ ; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
90+ ; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
91+ ; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
92+ ; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
93+ ; MAINVF4IC2_EPI4: br label %[[LOOP:.*]]
94+ ; MAINVF4IC2_EPI4: [[LOOP]]:
95+ ; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
96+ ; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]]
97+ ; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]:
98+ ; MAINVF4IC2_EPI4: br label %[[EXIT]]
99+ ; MAINVF4IC2_EPI4: [[EXIT]]:
46100;
47- ; CHECK: exit:
48- ; CHECK: ret void
49-
50- define void @f0 (i8 %n , i32 %len , ptr %p ) !prof !0 {
51101entry:
52102 %cmp.entry = icmp sgt i32 %len , 0
53103 br i1 %cmp.entry , label %loop , label %exit , !prof !1
@@ -72,11 +122,33 @@ exit:
72122!0 = !{!"function_entry_count" , i64 13 }
73123!1 = !{!"branch_weights" , i32 12 , i32 1 }
74124!2 = !{!"branch_weights" , i32 1234 , i32 1 }
75-
76- ; CHECK: [[PROF_F0_ENTRY]] = !{!"branch_weights", i32 12, i32 1}
77- ; CHECK: [[PROF_F0_UNLIKELY]] = !{!"branch_weights", i32 1, i32 127}
78- ; CHECK: [[PROF_F0_VECTOR_BODY]] = !{!"branch_weights", i32 1, i32 307}
79- ; CHECK: [[PROF_F0_MIDDLE_BLOCKS]] = !{!"branch_weights", i32 1, i32 3}
80- ; CHECK: [[PROF_F0_VEC_EPILOGUE_SKIP]] = !{!"branch_weights", i32 4, i32 0}
81- ; CHECK: [[PROF_F0_VEC_EPILOG_VECTOR_BODY]] = !{!"branch_weights", i32 0, i32 0}
82- ; CHECK: [[PROF_F0_LOOP]] = !{!"branch_weights", i32 2, i32 1}
125+ ;.
126+ ; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
127+ ; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
128+ ; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
129+ ; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307}
130+ ; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
131+ ; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
132+ ; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
133+ ; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
134+ ; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
135+ ; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
136+ ; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
137+ ; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1}
138+ ; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
139+ ;.
140+ ; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
141+ ; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
142+ ; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
143+ ; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
144+ ; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
145+ ; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
146+ ; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
147+ ; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7}
148+ ; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
149+ ; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
150+ ; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
151+ ; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3}
152+ ; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1}
153+ ; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]}
154+ ;.
0 commit comments