Skip to content

Commit 70c1c8f

Browse files
committed
[VPlan] Create SCEV expansion for epilogue check first.
VPExpandSCEVRecipes must be at the beginning of the entry block. addMinimumEpilogueIterationCheck currently creates VPInstructions to compute the remaining iterations before potentially creating VPExpandSCEVRecipes. Fix this by first creating any SCEV expansions if needed. Fixes #162128.
1 parent 0df5fc7 commit 70c1c8f

File tree

2 files changed

+75
-3
lines changed

2 files changed

+75
-3
lines changed

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -763,16 +763,15 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck(
763763
// Add the minimum iteration check for the epilogue vector loop.
764764
VPValue *TC = Plan.getOrAddLiveIn(TripCount);
765765
VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
766+
VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
767+
TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
766768
VPValue *Count = Builder.createNaryOp(
767769
Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)},
768770
DebugLoc::getUnknown(), "n.vec.remaining");
769771

770772
// Generate code to check if the loop's trip count is less than VF * UF of
771773
// the vector epilogue loop.
772774
auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
773-
VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
774-
TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
775-
776775
auto *CheckMinIters = Builder.createICmp(
777776
P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
778777
VPInstruction *Branch =
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt -p loop-vectorize -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S %s | FileCheck %s
3+
4+
@end = external global [128 x i8]
5+
6+
; Test case for https://github.com/llvm/llvm-project/issues/162128.
7+
define void @test_epilogue_step_scev_expansion(ptr %dst) {
8+
; CHECK-LABEL: define void @test_epilogue_step_scev_expansion(
9+
; CHECK-SAME: ptr [[DST:%.*]]) {
10+
; CHECK-NEXT: [[ITER_CHECK:.*]]:
11+
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
12+
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
13+
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
14+
; CHECK: [[VECTOR_PH]]:
15+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), 4
16+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_MOD_VF]]
17+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
18+
; CHECK: [[VECTOR_BODY]]:
19+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
20+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
21+
; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP0]], align 1
22+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
23+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
24+
; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK: [[MIDDLE_BLOCK]]:
26+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC]]
27+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
28+
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
29+
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC]]
30+
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
31+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
32+
; CHECK: [[VEC_EPILOG_PH]]:
33+
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
34+
; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), 4
35+
; CHECK-NEXT: [[N_VEC2:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_MOD_VF1]]
36+
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
37+
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
38+
; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
39+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX3]]
40+
; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP2]], align 1
41+
; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 4
42+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT4]], [[N_VEC2]]
43+
; CHECK-NEXT: br i1 [[TMP3]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
44+
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
45+
; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC2]]
46+
; CHECK-NEXT: br i1 [[CMP_N5]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
47+
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
48+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
49+
; CHECK-NEXT: br label %[[LOOP:.*]]
50+
; CHECK: [[LOOP]]:
51+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
52+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
53+
; CHECK-NEXT: store i8 0, ptr [[GEP_DST]], align 1
54+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
55+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], sub (i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @end, i64 1) to i64))
56+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
57+
; CHECK: [[EXIT]]:
58+
; CHECK-NEXT: ret void
59+
;
60+
entry:
61+
br label %loop
62+
63+
loop:
64+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
65+
%gep.dst = getelementptr i8, ptr %dst, i64 %iv
66+
store i8 0, ptr %gep.dst, align 1
67+
%iv.next = add i64 %iv, 1
68+
%ec = icmp eq i64 %iv, sub (i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @end, i64 1) to i64))
69+
br i1 %ec, label %exit, label %loop
70+
71+
exit:
72+
ret void
73+
}

0 commit comments

Comments
 (0)