Skip to content

Commit 8d001b2

Browse files
committed
!fixup address comments, thanks
1 parent 6cd2715 commit 8d001b2

20 files changed

+181
-517
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10262,6 +10262,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1026210262
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
1026310263
ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan);
1026410264

10265+
// TODO: Move to general VPlan pipeline once epilogue loops are also supported.
10266+
VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount, BestPlan, VF.Width, IC, PSE);
10267+
1026510268
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
1026610269

1026710270
ORE->emit([&]() {
@@ -10329,24 +10332,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032910332
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
1033010333
VF.MinProfitableTripCount, IC, &CM, BFI, PSI,
1033110334
Checks, BestPlan);
10332-
10333-
// Materialize vector trip counts for constants early if it can simply
10334-
// be computed as (Original TC / VF * UF) * VF * UF.
10335-
if (BestPlan.hasScalarTail() &&
10336-
!CM.requiresScalarEpilogue(VF.Width.isVector())) {
10337-
VPValue *TC = BestPlan.getTripCount();
10338-
if (TC->isLiveIn()) {
10339-
ScalarEvolution &SE = *PSE.getSE();
10340-
auto *TCScev = SE.getSCEV(TC->getLiveInIRValue());
10341-
const SCEV *VFxUF =
10342-
SE.getElementCount(TCScev->getType(), VF.Width * IC);
10343-
auto VecTCScev =
10344-
SE.getMulExpr(SE.getUDivExpr(TCScev, VFxUF), VFxUF);
10345-
if (auto *NewC = dyn_cast<SCEVConstant>(VecTCScev))
10346-
BestPlan.getVectorTripCount().setUnderlyingValue(
10347-
NewC->getValue());
10348-
}
10349-
}
10335+
// TODO: Move to general VPlan pipeline once epilogue loops are also supported.
10336+
VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount, BestPlan, VF.Width, IC, PSE);
1035010337

1035110338
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
1035210339
++LoopsVectorized;

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3115,6 +3115,29 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
31153115
}
31163116
}
31173117

3118+
void VPlanTransforms::materializeVectorTripCount(VPlan &Plan, ElementCount BestVF,
3119+
unsigned BestUF,
3120+
PredicatedScalarEvolution &PSE) {
3121+
assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
3122+
assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
3123+
3124+
VPValue *TC = Plan.getTripCount();
3125+
// Skip cases for which the trip count may be non-trivial to materialize.
3126+
if (!Plan.hasScalarTail() || Plan.getMiddleBlock()->getSingleSuccessor() == Plan.getScalarPreheader() || !TC->isLiveIn())
3127+
return;
3128+
// Materialize vector trip counts for constants early if it can simply
3129+
// be computed as (Original TC / VF * UF) * VF * UF.
3130+
ScalarEvolution &SE = *PSE.getSE();
3131+
auto *TCScev = SE.getSCEV(TC->getLiveInIRValue());
3132+
const SCEV *VFxUF =
3133+
SE.getElementCount(TCScev->getType(), BestVF * BestUF);
3134+
auto VecTCScev =
3135+
SE.getMulExpr(SE.getUDivExpr(TCScev, VFxUF), VFxUF);
3136+
if (auto *NewC = dyn_cast<SCEVConstant>(VecTCScev))
3137+
Plan.getVectorTripCount().setUnderlyingValue(
3138+
NewC->getValue());
3139+
}
3140+
31183141
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
31193142
/// converted to a narrower recipe. \p V is used by a wide recipe \p WideMember
31203143
/// that feeds a store interleave group at index \p Idx, \p WideMember0 is the

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ struct VPlanTransforms {
222222
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
223223
static void materializeBroadcasts(VPlan &Plan);
224224

225+
// Materialize vector trip counts for constants early if it can simply be computed as (Original TC / VF * UF) * VF * UF.
226+
static void materializeVectorTripCount(VPlan &Plan, ElementCount BestVF,
227+
unsigned BestUF,
228+
PredicatedScalarEvolution &PSE);
229+
225230
/// Try to convert a plan with interleave groups with VF elements to a plan
226231
/// with the interleave groups replaced by wide loads and stores processing VF
227232
/// elements, if all transformed interleave groups access the full vector

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ define void @narrow_with_uniform_add_and_gep(ptr noalias %p) {
247247
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
248248
; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
249249
; VF2: [[MIDDLE_BLOCK]]:
250-
; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
250+
; VF2-NEXT: br [[EXIT:label %.*]]
251251
; VF2: [[SCALAR_PH]]:
252252
;
253253
; VF4-LABEL: define void @narrow_with_uniform_add_and_gep(
@@ -273,7 +273,7 @@ define void @narrow_with_uniform_add_and_gep(ptr noalias %p) {
273273
; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
274274
; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
275275
; VF4: [[MIDDLE_BLOCK]]:
276-
; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
276+
; VF4-NEXT: br [[EXIT:label %.*]]
277277
; VF4: [[SCALAR_PH]]:
278278
;
279279
entry:

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
187187
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
188188
; CHECK-NEXT: LV: Loop does not require scalar epilogue
189189
; CHECK-NEXT: LV: Loop does not require scalar epilogue
190-
; CHECK-NEXT: LV: Loop does not require scalar epilogue
191190
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
192191
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
193192
; CHECK-NEXT: Live-in ir<%18> = VF
@@ -597,7 +596,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
597596
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
598597
; CHECK-NEXT: LV: Loop does not require scalar epilogue
599598
; CHECK-NEXT: LV: Loop does not require scalar epilogue
600-
; CHECK-NEXT: LV: Loop does not require scalar epilogue
601599
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
602600
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
603601
; CHECK-NEXT: Live-in ir<%18> = VF

llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ define float @for_load_interleave_only(ptr %src) {
77
; CHECK-NEXT: [[ENTRY:.*]]:
88
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
99
; CHECK: [[VECTOR_PH]]:
10-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 16000
1110
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1211
; CHECK: [[VECTOR_BODY]]:
1312
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -16,18 +15,17 @@ define float @for_load_interleave_only(ptr %src) {
1615
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
1716
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
1817
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[NEXT_GEP]], align 4
19-
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[NEXT_GEP2]], align 4
2018
; CHECK-NEXT: store float 0.000000e+00, ptr [[NEXT_GEP]], align 4
2119
; CHECK-NEXT: store float 0.000000e+00, ptr [[NEXT_GEP2]], align 4
2220
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2321
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
2422
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2523
; CHECK: [[MIDDLE_BLOCK]]:
26-
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
24+
; CHECK-NEXT: br label %[[EXIT:.*]]
2725
; CHECK: [[SCALAR_PH]]:
28-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1001, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
29-
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[SRC]], %[[ENTRY]] ]
30-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
26+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, %[[ENTRY]] ]
27+
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ]
28+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ]
3129
; CHECK-NEXT: br label %[[LOOP:.*]]
3230
; CHECK: [[LOOP]]:
3331
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]

0 commit comments

Comments
 (0)