Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,13 +535,13 @@ class LoopVectorizationPlanner {
/// Returns true if the per-lane cost of VectorizationFactor A is lower than
/// that of B.
bool isMoreProfitable(const VectorizationFactor &A,
const VectorizationFactor &B) const;
const VectorizationFactor &B, bool HasTail) const;

/// Returns true if the per-lane cost of VectorizationFactor A is lower than
/// that of B in the context of vectorizing a loop with known \p MaxTripCount.
bool isMoreProfitable(const VectorizationFactor &A,
const VectorizationFactor &B,
const unsigned MaxTripCount) const;
const unsigned MaxTripCount, bool HasTail) const;

/// Determines if we have the infrastructure to vectorize the loop and its
/// epilogue, assuming the main loop is vectorized by \p VF.
Expand Down
39 changes: 22 additions & 17 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4253,9 +4253,10 @@ static unsigned getEstimatedRuntimeVF(ElementCount VF,
return EstimatedVF;
}

bool LoopVectorizationPlanner::isMoreProfitable(
const VectorizationFactor &A, const VectorizationFactor &B,
const unsigned MaxTripCount) const {
bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
const VectorizationFactor &B,
const unsigned MaxTripCount,
bool HasTail) const {
InstructionCost CostA = A.Cost;
InstructionCost CostB = B.Cost;

Expand Down Expand Up @@ -4293,9 +4294,9 @@ bool LoopVectorizationPlanner::isMoreProfitable(
if (!MaxTripCount)
return CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA);

auto GetCostForTC = [MaxTripCount, this](unsigned VF,
InstructionCost VectorCost,
InstructionCost ScalarCost) {
auto GetCostForTC = [MaxTripCount, HasTail](unsigned VF,
InstructionCost VectorCost,
InstructionCost ScalarCost) {
// If the trip count is a known (possibly small) constant, the trip count
// will be rounded up to an integer number of iterations under
// FoldTailByMasking. The total cost in that case will be
Expand All @@ -4304,20 +4305,23 @@ bool LoopVectorizationPlanner::isMoreProfitable(
// some extra overheads, but for the purpose of comparing the costs of
// different VFs we can use this to compare the total loop-body cost
// expected after vectorization.
if (CM.foldTailByMasking())
return VectorCost * divideCeil(MaxTripCount, VF);
return VectorCost * (MaxTripCount / VF) + ScalarCost * (MaxTripCount % VF);
if (HasTail)
return VectorCost * (MaxTripCount / VF) +
ScalarCost * (MaxTripCount % VF);
return VectorCost * divideCeil(MaxTripCount, VF);
};

auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost);
auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost);
return CmpFn(RTCostA, RTCostB);
}

bool LoopVectorizationPlanner::isMoreProfitable(
const VectorizationFactor &A, const VectorizationFactor &B) const {
bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
const VectorizationFactor &B,
bool HasTail) const {
const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount();
return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount);
return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount,
HasTail);
}

void LoopVectorizationPlanner::emitInvalidCostRemarks(
Expand Down Expand Up @@ -4607,7 +4611,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
continue;
}

if (isMoreProfitable(Candidate, ChosenFactor))
if (isMoreProfitable(Candidate, ChosenFactor, P->hasScalarTail()))
ChosenFactor = Candidate;
}
}
Expand All @@ -4621,7 +4625,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
}

LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() &&
!isMoreProfitable(ChosenFactor, ScalarCost)) dbgs()
!isMoreProfitable(ChosenFactor, ScalarCost,
!CM.foldTailByMasking())) dbgs()
<< "LV: Vectorization seems to be not beneficial, "
<< "but was forced by a user.\n");
return ChosenFactor;
Expand Down Expand Up @@ -4787,7 +4792,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
}

if (Result.Width.isScalar() ||
isMoreProfitable(NextVF, Result, MaxTripCount))
isMoreProfitable(NextVF, Result, MaxTripCount, !CM.foldTailByMasking()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why doesn't this use the new hasScalarTail function yet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

selectEpilogueVectorizationFactor doesn't really work on VPlans directly so it isn't readily available. I think it would be good to update to use VPlans directly in the funciton separately

Result = NextVF;
}

Expand Down Expand Up @@ -7540,11 +7545,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {

InstructionCost Cost = cost(*P, VF);
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
if (isMoreProfitable(CurrentFactor, BestFactor))
if (isMoreProfitable(CurrentFactor, BestFactor, P->hasScalarTail()))
BestFactor = CurrentFactor;

// If profitable add it to ProfitableVF list.
if (isMoreProfitable(CurrentFactor, ScalarFactor))
if (isMoreProfitable(CurrentFactor, ScalarFactor, P->hasScalarTail()))
ProfitableVFs.push_back(CurrentFactor);
}
}
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3768,6 +3768,13 @@ class VPlan {
/// successors of the block in VPlan. The returned block is owned by the VPlan
/// and deleted once the VPlan is destroyed.
VPIRBasicBlock *createVPIRBasicBlock(BasicBlock *IRBB);

/// Returns true if the scalar tail may execute after the vector loop. Note
/// that this relies on unneeded branches to the scalar tail loop being
/// removed.
bool hasScalarTail() const {
return getScalarPreheader()->getNumPredecessors() != 0;
}
};

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down