Skip to content

Commit 9181a7e

Browse files
authored
[LV] Fix branch weights in epilogue min iteration check block (#152534)
I've changed how we construct the EpilogueVectorizerEpilogueLoop and EpilogueVectorizerMainLoop classes so that we construct the parent class with an additional boolean parameter indicating whether we're vectorising the main or epilogue loop. The InnerLoopAndEpilogueVectorizer class uses this new argument in combination with the EpilogueLoopVectorizationInfo struct to set the right UF and VF values. This then allows EpilogueVectorizerEpilogueLoop to access the correct values of VF and UF for the main loop, which are required when setting branch weights in the minimum iteration check block.
1 parent 6fa13d7 commit 9181a7e

File tree

3 files changed

+26
-31
lines changed

3 files changed

+26
-31
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -687,10 +687,12 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
687687
const TargetTransformInfo *TTI, AssumptionCache *AC,
688688
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
689689
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
690-
ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks, VPlan &Plan)
691-
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
692-
EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, CM,
693-
BFI, PSI, Checks, Plan),
690+
ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks, VPlan &Plan,
691+
ElementCount VecWidth, ElementCount MinProfitableTripCount,
692+
unsigned UnrollFactor)
693+
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, VecWidth,
694+
MinProfitableTripCount, UnrollFactor, CM, BFI, PSI,
695+
Checks, Plan),
694696
EPI(EPI) {}
695697

696698
// Override this function to handle the more complex control flow around the
@@ -725,8 +727,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
725727
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
726728
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
727729
ProfileSummaryInfo *PSI, GeneratedRTChecks &Check, VPlan &Plan)
728-
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
729-
EPI, CM, BFI, PSI, Check, Plan) {}
730+
: InnerLoopAndEpilogueVectorizer(
731+
OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, CM, BFI, PSI, Check,
732+
Plan, EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF) {}
730733
/// Implements the interface for creating a vectorized skeleton using the
731734
/// *main loop* strategy (ie the first pass of vplan execution).
732735
BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
@@ -752,8 +755,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
752755
OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
753756
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
754757
ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks, VPlan &Plan)
755-
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
756-
EPI, CM, BFI, PSI, Checks, Plan) {
758+
: InnerLoopAndEpilogueVectorizer(
759+
OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, CM, BFI, PSI, Checks,
760+
Plan, EPI.EpilogueVF, EPI.EpilogueVF, EPI.EpilogueUF) {
757761
TripCount = EPI.TripCount;
758762
}
759763
/// Implements the interface for creating a vectorized skeleton using the
@@ -7508,8 +7512,9 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
75087512
assert(Bypass && "Expected valid bypass basic block.");
75097513
Value *Count = getTripCount();
75107514
MinProfitableTripCount = ElementCount::getFixed(0);
7511-
Value *CheckMinIters = createIterationCountCheck(
7512-
ForEpilogue ? EPI.EpilogueVF : VF, ForEpilogue ? EPI.EpilogueUF : UF);
7515+
Value *CheckMinIters =
7516+
createIterationCountCheck(ForEpilogue ? EPI.EpilogueVF : EPI.MainLoopVF,
7517+
ForEpilogue ? EPI.EpilogueUF : EPI.MainLoopUF);
75137518

75147519
BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
75157520
if (!ForEpilogue)
@@ -7641,9 +7646,7 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
76417646
BranchInst &BI =
76427647
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
76437648
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7644-
// FIXME: See test Transforms/LoopVectorize/branch-weights.ll. I don't
7645-
// think the MainLoopStep is correct.
7646-
unsigned MainLoopStep = UF * VF.getKnownMinValue();
7649+
unsigned MainLoopStep = EPI.MainLoopUF * EPI.MainLoopVF.getKnownMinValue();
76477650
unsigned EpilogueLoopStep =
76487651
EPI.EpilogueUF * EPI.EpilogueVF.getKnownMinValue();
76497652
// We assume the remaining `Count` is equally distributed in
@@ -10288,8 +10291,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1028810291

1028910292
// Second pass vectorizes the epilogue and adjusts the control flow
1029010293
// edges from the first pass.
10291-
EPI.MainLoopVF = EPI.EpilogueVF;
10292-
EPI.MainLoopUF = EPI.EpilogueUF;
1029310294
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
1029410295
ORE, EPI, &CM, BFI, PSI,
1029510296
Checks, BestEpiPlan);

llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define void @_Z3foov(i64 %n) {
1818
; CHECK-V1-IC1: [[VECTOR_PH]]:
1919
; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]]
2020
; CHECK-V1-IC1: [[VECTOR_BODY]]:
21-
; CHECK-V1-IC1: br i1 [[TMP10:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
21+
; CHECK-V1-IC1: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]]
2222
; CHECK-V1-IC1: [[MIDDLE_BLOCK]]:
2323
; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF4:![0-9]+]]
2424
; CHECK-V1-IC1: [[SCALAR_PH]]:
@@ -34,13 +34,13 @@ define void @_Z3foov(i64 %n) {
3434
; CHECK-V2-IC1: [[VECTOR_PH]]:
3535
; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]]
3636
; CHECK-V2-IC1: [[VECTOR_BODY]]:
37-
; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
37+
; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
3838
; CHECK-V2-IC1: [[MIDDLE_BLOCK]]:
3939
; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]]
4040
; CHECK-V2-IC1: [[SCALAR_PH]]:
4141
; CHECK-V2-IC1: br label %[[FOR_BODY:.*]]
4242
; CHECK-V2-IC1: [[FOR_BODY]]:
43-
; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]]
43+
; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
4444
; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]:
4545
;
4646
; CHECK-V2-IC4-LABEL: define void @_Z3foov(
@@ -52,15 +52,15 @@ define void @_Z3foov(i64 %n) {
5252
; CHECK-V2-IC4: [[VECTOR_PH]]:
5353
; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]]
5454
; CHECK-V2-IC4: [[VECTOR_BODY]]:
55-
; CHECK-V2-IC4: br i1 [[TMP10:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
55+
; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
5656
; CHECK-V2-IC4: [[MIDDLE_BLOCK]]:
5757
; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]]
5858
; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]:
5959
; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]]
6060
; CHECK-V2-IC4: [[VEC_EPILOG_PH]]:
6161
; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
6262
; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]:
63-
; CHECK-V2-IC4: br i1 [[TMP15:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
63+
; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
6464
; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
6565
; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]]
6666
; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]:
@@ -101,16 +101,17 @@ for.cond.cleanup: ; preds = %for.body
101101
; CHECK-V2-IC1: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
102102
; CHECK-V2-IC1: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
103103
; CHECK-V2-IC1: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
104-
; CHECK-V2-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0}
105-
; CHECK-V2-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]], [[META3]]}
104+
; CHECK-V2-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 3}
105+
; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
106+
; CHECK-V2-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
106107
;.
107108
; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
108109
; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63}
109110
; CHECK-V2-IC4: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
110111
; CHECK-V2-IC4: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
111112
; CHECK-V2-IC4: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
112113
; CHECK-V2-IC4: [[PROF5]] = !{!"branch_weights", i32 1, i32 15}
113-
; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 4, i32 0}
114+
; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 4, i32 12}
114115
; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META4]]}
115116
; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3}
116117
; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}

llvm/test/Transforms/LoopVectorize/branch-weights.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,6 @@
44
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
55
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
66

7-
; FIXME: For MAINVF4IC2_EPI4 the branch weights in the terminator of
8-
; the VEC_EPILOG_ITER_CHECK block should be [4,4] since we process 8
9-
; scalar iterations in the main loop, leaving the remaining count to
10-
; be in the range [0,7]. That gives a 4:4 chance of skipping the
11-
; vector epilogue. I believe the problem lies in
12-
; EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck
13-
; where the main loop VF is set to the same value as the epilogue VF.
147
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
158
; MAINVF4IC1_EPI4-LABEL: define void @f0(
169
; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
@@ -145,7 +138,7 @@ exit:
145138
; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
146139
; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
147140
; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7}
148-
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0}
141+
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 4}
149142
; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
150143
; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]}
151144
; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3}

0 commit comments

Comments
 (0)