Skip to content

Commit 8140779

Browse files
authored
[LV] Improve accuracy of branch weights in epilogue iteration check block (#152980)
When one of the vector loops (main or epilogue) is scalable and the other isn't, we can use the estimated value of vscale to improve the accuracy.
1 parent 0bfa171 commit 8140779

File tree

2 files changed

+193
-8
lines changed

2 files changed

+193
-8
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7644,9 +7644,11 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
76447644
BranchInst &BI =
76457645
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
76467646
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7647-
unsigned MainLoopStep = EPI.MainLoopUF * EPI.MainLoopVF.getKnownMinValue();
7647+
auto VScale = Cost->getVScaleForTuning();
7648+
unsigned MainLoopStep =
7649+
estimateElementCount(EPI.MainLoopVF * EPI.MainLoopUF, VScale);
76487650
unsigned EpilogueLoopStep =
7649-
EPI.EpilogueUF * EPI.EpilogueVF.getKnownMinValue();
7651+
estimateElementCount(EPI.EpilogueVF * EPI.EpilogueUF, VScale);
76507652
// We assume the remaining `Count` is equally distributed in
76517653
// [0, MainLoopStep)
76527654
// So the probability for `Count < EpilogueLoopStep` should be

llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll

Lines changed: 189 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br" --filter "^.*:" --version 5
22
; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v1 -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=CHECK-V1-IC1
3+
; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v1 -force-vector-interleave=1 \
4+
; RUN: -epilogue-vectorization-force-VF=4 -S < %s | FileCheck %s -check-prefix=CHECK-V1-IC1-FORCE-EPI4
35
; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v2 -force-vector-interleave=1 -S < %s | FileCheck %s -check-prefix=CHECK-V2-IC1
46
; RUN: opt -passes="print<block-freq>,loop-vectorize" -mcpu=neoverse-v2 -force-vector-interleave=4 -S < %s | FileCheck %s -check-prefix=CHECK-V2-IC4
57

@@ -10,15 +12,15 @@ target triple = "aarch64-unknown-linux-gnu"
1012

1113
; We expect the branch weight computations after vectorisation to use
1214
; vscale=2 for neoverse-v1 and vscale=1 for neoverse-v2.
13-
define void @_Z3foov(i64 %n) {
14-
; CHECK-V1-IC1-LABEL: define void @_Z3foov(
15+
define void @foo_i32(i64 %n) {
16+
; CHECK-V1-IC1-LABEL: define void @foo_i32(
1517
; CHECK-V1-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
1618
; CHECK-V1-IC1: [[ENTRY:.*:]]
1719
; CHECK-V1-IC1: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0:![0-9]+]]
1820
; CHECK-V1-IC1: [[VECTOR_PH]]:
1921
; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]]
2022
; CHECK-V1-IC1: [[VECTOR_BODY]]:
21-
; CHECK-V1-IC1: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
23+
; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
2224
; CHECK-V1-IC1: [[MIDDLE_BLOCK]]:
2325
; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF4:![0-9]+]]
2426
; CHECK-V1-IC1: [[SCALAR_PH]]:
@@ -27,7 +29,33 @@ define void @_Z3foov(i64 %n) {
2729
; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]]
2830
; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]:
2931
;
30-
; CHECK-V2-IC1-LABEL: define void @_Z3foov(
32+
; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32(
33+
; CHECK-V1-IC1-FORCE-EPI4-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
34+
; CHECK-V1-IC1-FORCE-EPI4: [[ITER_CHECK:.*:]]
35+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0:![0-9]+]]
36+
; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
37+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
38+
; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]:
39+
; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]]
40+
; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]:
41+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
42+
; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]:
43+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF4:![0-9]+]]
44+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]:
45+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]]
46+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]:
47+
; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
48+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
49+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
50+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
51+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7:![0-9]+]]
52+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]:
53+
; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]]
54+
; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]:
55+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]]
56+
; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]:
57+
;
58+
; CHECK-V2-IC1-LABEL: define void @foo_i32(
3159
; CHECK-V2-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
3260
; CHECK-V2-IC1: [[ENTRY:.*:]]
3361
; CHECK-V2-IC1: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0:![0-9]+]]
@@ -43,9 +71,9 @@ define void @_Z3foov(i64 %n) {
4371
; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
4472
; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]:
4573
;
46-
; CHECK-V2-IC4-LABEL: define void @_Z3foov(
74+
; CHECK-V2-IC4-LABEL: define void @foo_i32(
4775
; CHECK-V2-IC4-SAME: i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
48-
; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY1:.*:]]
76+
; CHECK-V2-IC4: [[ITER_CHECK:.*:]]
4977
; CHECK-V2-IC4: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0:![0-9]+]]
5078
; CHECK-V2-IC4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
5179
; CHECK-V2-IC4: br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
@@ -86,6 +114,128 @@ for.cond.cleanup: ; preds = %for.body
86114
ret void
87115
}
88116

117+
define void @foo_i8(i64 %n) {
118+
; CHECK-V1-IC1-LABEL: define void @foo_i8(
119+
; CHECK-V1-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
120+
; CHECK-V1-IC1: [[ITER_CHECK:.*:]]
121+
; CHECK-V1-IC1: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
122+
; CHECK-V1-IC1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
123+
; CHECK-V1-IC1: br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
124+
; CHECK-V1-IC1: [[VECTOR_PH]]:
125+
; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]]
126+
; CHECK-V1-IC1: [[VECTOR_BODY]]:
127+
; CHECK-V1-IC1: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]]
128+
; CHECK-V1-IC1: [[MIDDLE_BLOCK]]:
129+
; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7]]
130+
; CHECK-V1-IC1: [[VEC_EPILOG_ITER_CHECK]]:
131+
; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]]
132+
; CHECK-V1-IC1: [[VEC_EPILOG_PH]]:
133+
; CHECK-V1-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
134+
; CHECK-V1-IC1: [[VEC_EPILOG_VECTOR_BODY]]:
135+
; CHECK-V1-IC1: br i1 [[TMP15:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
136+
; CHECK-V1-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]:
137+
; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]]
138+
; CHECK-V1-IC1: [[VEC_EPILOG_SCALAR_PH]]:
139+
; CHECK-V1-IC1: br label %[[FOR_BODY:.*]]
140+
; CHECK-V1-IC1: [[FOR_BODY]]:
141+
; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP12:![0-9]+]]
142+
; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]:
143+
;
144+
; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i8(
145+
; CHECK-V1-IC1-FORCE-EPI4-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
146+
; CHECK-V1-IC1-FORCE-EPI4: [[ITER_CHECK:.*:]]
147+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
148+
; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
149+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
150+
; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]:
151+
; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]]
152+
; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]:
153+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]]
154+
; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]:
155+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10]]
156+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]:
157+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]]
158+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]:
159+
; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
160+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
161+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
162+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
163+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]]
164+
; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]:
165+
; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]]
166+
; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]:
167+
; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8]], !llvm.loop [[LOOP14:![0-9]+]]
168+
; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]:
169+
;
170+
; CHECK-V2-IC1-LABEL: define void @foo_i8(
171+
; CHECK-V2-IC1-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
172+
; CHECK-V2-IC1: [[ITER_CHECK:.*:]]
173+
; CHECK-V2-IC1: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
174+
; CHECK-V2-IC1: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
175+
; CHECK-V2-IC1: br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
176+
; CHECK-V2-IC1: [[VECTOR_PH]]:
177+
; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]]
178+
; CHECK-V2-IC1: [[VECTOR_BODY]]:
179+
; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]]
180+
; CHECK-V2-IC1: [[MIDDLE_BLOCK]]:
181+
; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10:![0-9]+]]
182+
; CHECK-V2-IC1: [[VEC_EPILOG_ITER_CHECK]]:
183+
; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]]
184+
; CHECK-V2-IC1: [[VEC_EPILOG_PH]]:
185+
; CHECK-V2-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
186+
; CHECK-V2-IC1: [[VEC_EPILOG_VECTOR_BODY]]:
187+
; CHECK-V2-IC1: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
188+
; CHECK-V2-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]:
189+
; CHECK-V2-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF5]]
190+
; CHECK-V2-IC1: [[VEC_EPILOG_SCALAR_PH]]:
191+
; CHECK-V2-IC1: br label %[[FOR_BODY:.*]]
192+
; CHECK-V2-IC1: [[FOR_BODY]]:
193+
; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]]
194+
; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]:
195+
;
196+
; CHECK-V2-IC4-LABEL: define void @foo_i8(
197+
; CHECK-V2-IC4-SAME: i64 [[N:%.*]]) #[[ATTR0]] {
198+
; CHECK-V2-IC4: [[ITER_CHECK:.*:]]
199+
; CHECK-V2-IC4: br i1 [[MIN_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF0]]
200+
; CHECK-V2-IC4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
201+
; CHECK-V2-IC4: br i1 [[MIN_ITERS_CHECK1:%.*]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF0]]
202+
; CHECK-V2-IC4: [[VECTOR_PH]]:
203+
; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]]
204+
; CHECK-V2-IC4: [[VECTOR_BODY]]:
205+
; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP11:![0-9]+]]
206+
; CHECK-V2-IC4: [[MIDDLE_BLOCK]]:
207+
; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF1]]
208+
; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]:
209+
; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]]
210+
; CHECK-V2-IC4: [[VEC_EPILOG_PH]]:
211+
; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
212+
; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]:
213+
; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
214+
; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
215+
; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]]
216+
; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]:
217+
; CHECK-V2-IC4: br label %[[FOR_BODY:.*]]
218+
; CHECK-V2-IC4: [[FOR_BODY]]:
219+
; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9]], !llvm.loop [[LOOP15:![0-9]+]]
220+
; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]:
221+
;
222+
entry:
223+
br label %for.body
224+
225+
for.body:
226+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
227+
%arrayidx = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %iv
228+
%load = load i8, ptr %arrayidx, align 1
229+
%arrayidx2 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %iv
230+
store i8 %load, ptr %arrayidx2, align 1
231+
%iv.next = add nuw nsw i64 %iv, 1
232+
%exitcond = icmp eq i64 %iv.next, %n
233+
br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !0
234+
235+
for.cond.cleanup:
236+
ret void
237+
}
238+
89239
!0 = !{!"branch_weights", i32 1, i32 1023}
90240
;.
91241
; CHECK-V1-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
@@ -95,6 +245,28 @@ for.cond.cleanup: ; preds = %for.body
95245
; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7}
96246
; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0}
97247
; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]}
248+
; CHECK-V1-IC1: [[PROF7]] = !{!"branch_weights", i32 1, i32 31}
249+
; CHECK-V1-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META3]]}
250+
; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 16, i32 16}
251+
; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]]}
252+
; CHECK-V1-IC1: [[PROF11]] = !{!"branch_weights", i32 1, i32 15}
253+
; CHECK-V1-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META2]]}
254+
;.
255+
; CHECK-V1-IC1-FORCE-EPI4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
256+
; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
257+
; CHECK-V1-IC1-FORCE-EPI4: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
258+
; CHECK-V1-IC1-FORCE-EPI4: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
259+
; CHECK-V1-IC1-FORCE-EPI4: [[PROF4]] = !{!"branch_weights", i32 1, i32 7}
260+
; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 4, i32 4}
261+
; CHECK-V1-IC1-FORCE-EPI4: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META3]]}
262+
; CHECK-V1-IC1-FORCE-EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3}
263+
; CHECK-V1-IC1-FORCE-EPI4: [[PROF8]] = !{!"branch_weights", i32 0, i32 0}
264+
; CHECK-V1-IC1-FORCE-EPI4: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META2]]}
265+
; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 31}
266+
; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META3]]}
267+
; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 4, i32 28}
268+
; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]]}
269+
; CHECK-V1-IC1-FORCE-EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]}
98270
;.
99271
; CHECK-V2-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
100272
; CHECK-V2-IC1: [[PROF1]] = !{!"branch_weights", i32 1, i32 255}
@@ -104,6 +276,12 @@ for.cond.cleanup: ; preds = %for.body
104276
; CHECK-V2-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 3}
105277
; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
106278
; CHECK-V2-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
279+
; CHECK-V2-IC1: [[PROF8]] = !{!"branch_weights", i32 1, i32 63}
280+
; CHECK-V2-IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META4]]}
281+
; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 15}
282+
; CHECK-V2-IC1: [[PROF11]] = !{!"branch_weights", i32 4, i32 12}
283+
; CHECK-V2-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META4]]}
284+
; CHECK-V2-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]], [[META3]]}
107285
;.
108286
; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
109287
; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63}
@@ -116,4 +294,9 @@ for.cond.cleanup: ; preds = %for.body
116294
; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3}
117295
; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
118296
; CHECK-V2-IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]}
297+
; CHECK-V2-IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]]}
298+
; CHECK-V2-IC4: [[PROF12]] = !{!"branch_weights", i32 8, i32 56}
299+
; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]]}
300+
; CHECK-V2-IC4: [[PROF14]] = !{!"branch_weights", i32 1, i32 7}
301+
; CHECK-V2-IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]}
119302
;.

0 commit comments

Comments
 (0)