Skip to content

Commit 4afbfd0

Browse files
committed
Address review comments
1 parent b22497f commit 4afbfd0

File tree

2 files changed

+19
-17
lines changed

2 files changed

+19
-17
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10177,20 +10177,19 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
1017710177
/// TODO: This is currently overly pessimistic because the loop may not take
1017810178
/// the early exit, but better to keep this conservative for now. In future,
1017910179
/// it might be possible to relax this by using branch probabilities.
10180-
static InstructionCost calculateEarlyExitCost(LoopVectorizationCostModel &CM,
10180+
static InstructionCost calculateEarlyExitCost(VPCostContext &CostCtx,
1018110181
VPlan &Plan, ElementCount VF) {
1018210182
InstructionCost Cost = 0;
10183-
VPCostContext CostCtx(CM.TTI, *CM.TLI, CM.Legal->getWidestInductionType(), CM,
10184-
CM.CostKind);
10185-
LLVM_DEBUG(
10186-
dbgs() << "Calculating cost of work in vector early exit block:\n");
1018710183
for (auto *ExitVPBB : Plan.getExitBlocks()) {
1018810184
for (auto *PredVPBB : ExitVPBB->getPredecessors()) {
1018910185
// If the predecessor is not the middle.block, then it must be the
1019010186
// vector.early.exit block, which may contain work to calculate the exit
1019110187
// values of variables used outside the loop.
10192-
if (PredVPBB != Plan.getMiddleBlock())
10188+
if (PredVPBB != Plan.getMiddleBlock()) {
10189+
LLVM_DEBUG(dbgs() << "Calculating cost of work in exit block "
10190+
<< PredVPBB->getName() << ":\n");
1019310191
Cost += PredVPBB->cost(VF, CostCtx);
10192+
}
1019410193
}
1019510194
}
1019610195
return Cost;
@@ -10204,18 +10203,18 @@ static InstructionCost calculateEarlyExitCost(LoopVectorizationCostModel &CM,
1020410203
/// extra work when exiting the loop early, such as calculating the final
1020510204
/// exit values of variables used outside the loop.
1020610205
static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
10207-
VectorizationFactor &VF,
10208-
LoopVectorizationCostModel &CM,
10206+
VectorizationFactor &VF, Loop *L,
1020910207
PredicatedScalarEvolution &PSE,
10210-
VPlan &Plan,
10211-
ScalarEpilogueLowering SEL) {
10208+
VPCostContext &CostCtx, VPlan &Plan,
10209+
ScalarEpilogueLowering SEL,
10210+
std::optional<unsigned> VScale) {
1021210211
InstructionCost TotalCost = Checks.getCost();
1021310212
if (!TotalCost.isValid())
1021410213
return false;
1021510214

1021610215
// Add on the cost of any work required in the vector early exit block, if
1021710216
// one exists.
10218-
TotalCost += calculateEarlyExitCost(CM, Plan, VF.Width);
10217+
TotalCost += calculateEarlyExitCost(CostCtx, Plan, VF.Width);
1021910218

1022010219
// When interleaving only scalar and vector cost will be equal, which in turn
1022110220
// would lead to a divide by 0. Fall back to hard threshold.
@@ -10266,7 +10265,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
1026610265
// For now we assume the epilogue cost EpiC = 0 for simplicity. Note that
1026710266
// the computations are performed on doubles, not integers and the result
1026810267
// is rounded up, hence we get an upper estimate of the TC.
10269-
unsigned IntVF = getEstimatedRuntimeVF(VF.Width, CM.getVScaleForTuning());
10268+
unsigned IntVF = getEstimatedRuntimeVF(VF.Width, VScale);
1027010269
uint64_t RtC = *TotalCost.getValue();
1027110270
uint64_t Div = ScalarC * IntVF - *VF.Cost.getValue();
1027210271
uint64_t MinTC1 = Div == 0 ? 0 : divideCeil(RtC * IntVF, Div);
@@ -10294,7 +10293,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
1029410293

1029510294
// Skip vectorization if the expected trip count is less than the minimum
1029610295
// required trip count.
10297-
if (auto ExpectedTC = getSmallBestKnownTC(PSE, CM.TheLoop)) {
10296+
if (auto ExpectedTC = getSmallBestKnownTC(PSE, L)) {
1029810297
if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC),
1029910298
VF.MinProfitableTripCount)) {
1030010299
LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected "
@@ -10694,9 +10693,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1069410693
// Check if it is profitable to vectorize with runtime checks.
1069510694
bool ForceVectorization =
1069610695
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
10696+
VPCostContext CostCtx(CM.TTI, *CM.TLI, CM.Legal->getWidestInductionType(),
10697+
CM, CM.CostKind);
1069710698
if (!ForceVectorization &&
10698-
!isOutsideLoopWorkProfitable(Checks, VF, CM, PSE,
10699-
LVP.getPlanFor(VF.Width), SEL)) {
10699+
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
10700+
LVP.getPlanFor(VF.Width), SEL,
10701+
CM.getVScaleForTuning())) {
1070010702
ORE->emit([&]() {
1070110703
return OptimizationRemarkAnalysisAliasing(
1070210704
DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),

llvm/test/Transforms/LoopVectorize/AArch64/early_exit_costs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ declare void @init_mem(ptr, i64);
1010
define i64 @same_exit_block_pre_inc_use1_sve() #1 {
1111
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_sve'
1212
; CHECK: LV: Selecting VF: vscale x 16
13-
; CHECK: Calculating cost of work in vector early exit block:
13+
; CHECK: Calculating cost of work in exit block vector.early.exit
1414
; CHECK-NEXT: Cost of 6 for VF vscale x 16: EMIT vp<{{.*}}> = extract-first-active
1515
; CHECK-NEXT: Cost of 6 for VF vscale x 16: EMIT vp<{{.*}}> = extract-first-active
1616
; CHECK: LV: Minimum required TC for runtime checks to be profitable:32
@@ -47,7 +47,7 @@ loop.end:
4747
define i64 @same_exit_block_pre_inc_use1_nosve() {
4848
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_nosve'
4949
; CHECK: LV: Selecting VF: 16
50-
; CHECK: Calculating cost of work in vector early exit block:
50+
; CHECK: Calculating cost of work in exit block vector.early.exit
5151
; CHECK-NEXT: Cost of 50 for VF 16: EMIT vp<{{.*}}> = extract-first-active
5252
; CHECK-NEXT: Cost of 50 for VF 16: EMIT vp<{{.*}}> = extract-first-active
5353
; CHECK: LV: Minimum required TC for runtime checks to be profitable:176

0 commit comments

Comments
 (0)