Skip to content

Commit a457b67

Browse files
committed
!fixup address latest comments, thanks
1 parent a872d4c commit a457b67

File tree

3 files changed

+57
-58
lines changed

3 files changed

+57
-58
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ class VPBuilder {
334334
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
335335
}
336336

337-
VPExpandSCEVRecipe *expandSCEV(const SCEV *Expr, ScalarEvolution &SE) {
337+
VPExpandSCEVRecipe *createExpandSCEV(const SCEV *Expr, ScalarEvolution &SE) {
338338
return tryInsertInstruction(new VPExpandSCEVRecipe(Expr, SE));
339339
}
340340

@@ -563,7 +563,8 @@ class LoopVectorizationPlanner {
563563
/// Emit remarks for recipes with invalid costs in the available VPlans.
564564
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE);
565565

566-
/// Create a check to \p Plan to see if the vector loop should be executed.
566+
/// Create a check to \p Plan to see if the vector loop should be executed
567+
/// based on its trip count.
567568
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
568569
ElementCount MinProfitableTripCount) const;
569570

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,8 @@ class InnerLoopVectorizer {
544544
protected:
545545
friend class LoopVectorizationPlanner;
546546

547-
/// Create a new IR basic block for the scalar preheader.
547+
/// Create a new IR basic block for the scalar preheader whose name is
548+
/// prefixed with \p Prefix.
548549
void createScalarPreheader(StringRef Prefix);
549550

550551
/// Allow subclasses to override and print debug traces before/after vplan
@@ -669,17 +670,6 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
669670
UnrollFactor, CM, BFI, PSI, Checks, Plan),
670671
EPI(EPI), MinProfitableTripCount(MinProfitableTripCount) {}
671672

672-
// Override this function to handle the more complex control flow around the
673-
// three loops.
674-
BasicBlock *createVectorizedLoopSkeleton() final {
675-
return createEpilogueVectorizedLoopSkeleton();
676-
}
677-
678-
/// The interface for creating a vectorized skeleton using one of two
679-
/// different strategies, each corresponding to one execution of the vplan
680-
/// as described above.
681-
virtual BasicBlock *createEpilogueVectorizedLoopSkeleton() = 0;
682-
683673
/// Holds and updates state information required to vectorize the main loop
684674
/// and its epilogue in two separate passes. This setup helps us avoid
685675
/// regenerating and recomputing runtime safety checks. It also helps us to
@@ -710,10 +700,10 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
710700
EPI.MainLoopVF, EPI.MainLoopUF) {}
711701
/// Implements the interface for creating a vectorized skeleton using the
712702
/// *main loop* strategy (ie the first pass of vplan execution).
713-
BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
703+
BasicBlock *createVectorizedLoopSkeleton() final;
714704

715705
protected:
716-
// Create a check to see if the vector loop should be executed
706+
// Create a check to see if the main vector loop should be executed
717707
Value *createIterationCountCheck(ElementCount VF, unsigned UF) const;
718708

719709
/// Emits an iteration count bypass check once for the main loop (when \p
@@ -742,7 +732,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
742732
}
743733
/// Implements the interface for creating a vectorized skeleton using the
744734
/// *epilogue loop* strategy (ie the second pass of vplan execution).
745-
BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
735+
BasicBlock *createVectorizedLoopSkeleton() final;
746736

747737
protected:
748738
/// Emits an iteration count bypass check after the main vector loop has
@@ -7268,7 +7258,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72687258
// at the wrong place.
72697259
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
72707260
if (!isa<VPIRInstruction, VPIRPhi>(&R))
7271-
continue;
7261+
break;
72727262
R.eraseFromParent();
72737263
}
72747264
for (Instruction &I : drop_begin(reverse(*Entry->getIRBasicBlock())))
@@ -7415,7 +7405,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
74157405

74167406
/// This function is partially responsible for generating the control flow
74177407
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7418-
BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
7408+
BasicBlock *EpilogueVectorizerMainLoop::createVectorizedLoopSkeleton() {
74197409
createScalarPreheader("");
74207410

74217411
// Generate the code to check the minimum iteration count of the vector
@@ -7501,8 +7491,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
75017491

75027492
/// This function is partially responsible for generating the control flow
75037493
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7504-
BasicBlock *
7505-
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
7494+
BasicBlock *EpilogueVectorizerEpilogueLoop::createVectorizedLoopSkeleton() {
75067495
createScalarPreheader("vec.epilog.");
75077496

75087497
// Now, compare the remaining count and if there aren't enough iterations to
@@ -9315,18 +9304,19 @@ void LoopVectorizationPlanner::addMinimumIterationCheck(
93159304
// vscale is not necessarily a power-of-2, which means we cannot guarantee
93169305
// an overflow to zero when updating induction variables and so an
93179306
// additional overflow check is required before entering the vector loop.
9318-
bool CheckNeededWithTailFolding =
9307+
bool IsIndvarOverflowCheckNeededForVF =
93199308
VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() &&
9320-
!isIndvarOverflowCheckKnownFalse(&CM, VF, 1) &&
9309+
!isIndvarOverflowCheckKnownFalse(&CM, VF, UF) &&
93219310
CM.getTailFoldingStyle() !=
93229311
TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
9312+
const uint32_t *BranchWeigths =
9313+
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
9314+
? &MinItersBypassWeights[0]
9315+
: nullptr;
93239316
VPlanTransforms::addMinimumIterationCheck(
93249317
Plan, VF, UF, MinProfitableTripCount,
93259318
CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(),
9326-
CheckNeededWithTailFolding, OrigLoop,
9327-
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
9328-
? &MinItersBypassWeights[0]
9329-
: nullptr,
9319+
IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths,
93309320
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(),
93319321
*PSE.getSE());
93329322
}

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -680,62 +680,70 @@ void VPlanTransforms::addMinimumIterationCheck(
680680
// vector trip count is zero. This check also covers the case where adding one
681681
// to the backedge-taken count overflowed leading to an incorrect trip count
682682
// of zero. In this case we will also jump to the scalar loop.
683-
auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
683+
CmpInst::Predicate CmpPred =
684+
RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
684685
// If tail is to be folded, vector loop takes care of all iterations.
685-
const SCEV *Count = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
686-
Type *CountTy = Count->getType();
687-
auto CreateStep = [&]() -> const SCEV * {
688-
const SCEV *VFxUF = SE.getElementCount(CountTy, (VF * UF), SCEV::FlagNUW);
689-
// Create step with max(MinProTripCount, UF * VF).
690-
if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue())
686+
VPValue *TripCountVPV = Plan.getTripCount();
687+
const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE);
688+
Type *TripCountTy = TripCount->getType();
689+
auto CreateMinTripCount = [&]() -> const SCEV * {
690+
// Create or get max(MinProfitableTripCount, UF * VF) and return it.
691+
const SCEV *VFxUF =
692+
SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
693+
const SCEV *MinProfitableTripCountSCEV =
694+
SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
695+
const SCEV *Max = SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
696+
if (!VF.isScalable())
697+
return Max;
698+
699+
if (UF * VF.getKnownMinValue() >=
700+
MinProfitableTripCount.getKnownMinValue()) {
701+
// TODO: SCEV should be able to simplify test.
691702
return VFxUF;
703+
}
692704

693-
const SCEV *MinProfTC =
694-
SE.getElementCount(CountTy, MinProfitableTripCount, SCEV::FlagNUW);
695-
if (!VF.isScalable())
696-
return MinProfTC;
697-
return SE.getUMaxExpr(MinProfTC, VFxUF);
705+
return Max;
698706
};
699707

700708
VPBasicBlock *EntryVPBB = Plan.getEntry();
701709
VPBuilder Builder(EntryVPBB);
702-
VPValue *CheckMinIters = Plan.getFalse();
703-
const SCEV *Step = CreateStep();
710+
VPValue *TripCountCheck = Plan.getFalse();
711+
const SCEV *Step = CreateMinTripCount();
704712
if (!TailFolded) {
705713
// TODO: Emit unconditional branch to vector preheader instead of
706714
// conditional branch with known condition.
707-
const SCEV *TripCountSCEV = SE.applyLoopGuards(Count, OrigLoop);
715+
TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
708716
// Check if the trip count is < the step.
709-
if (SE.isKnownPredicate(P, TripCountSCEV, Step)) {
717+
if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
710718
// TODO: Ensure step is at most the trip count when determining max VF and
711719
// UF, w/o tail folding.
712-
CheckMinIters = Plan.getTrue();
713-
} else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(P),
714-
TripCountSCEV, Step)) {
720+
TripCountCheck = Plan.getTrue();
721+
} else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
722+
TripCount, Step)) {
715723
// Generate the minimum iteration check only if we cannot prove the
716724
// check is known to be true, or known to be false.
717-
CheckMinIters = Builder.createICmp(P, Plan.getTripCount(),
718-
Builder.expandSCEV(Step, SE), DL,
719-
"min.iters.check");
720-
} // else step known to be < trip count, use CheckMinIters preset to false.
725+
VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step, SE);
726+
TripCountCheck = Builder.createICmp(
727+
CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
728+
} // else step known to be < trip count, use TripCountCheck preset to false.
721729
} else if (CheckNeededWithTailFolding) {
722730
// vscale is not necessarily a power-of-2, which means we cannot guarantee
723731
// an overflow to zero when updating induction variables and so an
724732
// additional overflow check is required before entering the vector loop.
725733

726734
// Get the maximum unsigned value for the type.
727-
VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(
728-
ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask()));
729-
VPValue *LHS = Builder.createNaryOp(Instruction::Sub,
730-
{MaxUIntTripCount, Plan.getTripCount()},
731-
DebugLoc::getUnknown());
735+
VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
736+
TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
737+
VPValue *DistanceToMax =
738+
Builder.createNaryOp(Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
739+
DebugLoc::getUnknown());
732740

733741
// Don't execute the vector loop if (UMax - n) < (VF * UF).
734-
CheckMinIters = Builder.createICmp(ICmpInst::ICMP_ULT, LHS,
735-
Builder.expandSCEV(Step, SE), DL);
742+
TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
743+
Builder.createExpandSCEV(Step, SE), DL);
736744
}
737745
VPInstruction *Term =
738-
Builder.createNaryOp(VPInstruction::BranchOnCond, {CheckMinIters}, DL);
746+
Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
739747
if (MinItersBypassWeights) {
740748
MDBuilder MDB(Plan.getContext());
741749
MDNode *BranchWeights = MDB.createBranchWeights(

0 commit comments

Comments
 (0)