@@ -687,30 +687,47 @@ void VPlanTransforms::addMinimumIterationCheck(
687687 VPValue *TripCountVPV = Plan.getTripCount ();
688688 const SCEV *TripCount = vputils::getSCEVExprForVPValue (TripCountVPV, SE);
689689 Type *TripCountTy = TripCount->getType ();
690- auto CreateMinTripCount = [&]() -> const SCEV * {
691- // Create or get max(MinProfitableTripCount, UF * VF) and return it.
690+ auto GetMinTripCount = [&]() -> const SCEV * {
691+ // Compute max(MinProfitableTripCount, UF * VF) and return it.
692692 const SCEV *VFxUF =
693693 SE.getElementCount (TripCountTy, (VF * UF), SCEV::FlagNUW);
694- const SCEV *MinProfitableTripCountSCEV =
695- SE.getElementCount (TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
696- const SCEV *Max = SE.getUMaxExpr (MinProfitableTripCountSCEV, VFxUF);
697- if (!VF.isScalable ())
698- return Max;
699-
700694 if (UF * VF.getKnownMinValue () >=
701695 MinProfitableTripCount.getKnownMinValue ()) {
702696 // TODO: SCEV should be able to simplify test.
703697 return VFxUF;
704698 }
705-
706- return Max;
699+ const SCEV *MinProfitableTripCountSCEV =
700+ SE.getElementCount (TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
701+ return SE.getUMaxExpr (MinProfitableTripCountSCEV, VFxUF);
707702 };
708703
709704 VPBasicBlock *EntryVPBB = Plan.getEntry ();
710705 VPBuilder Builder (EntryVPBB);
711706 VPValue *TripCountCheck = Plan.getFalse ();
712- const SCEV *Step = CreateMinTripCount ();
713- if (!TailFolded) {
707+ const SCEV *Step = GetMinTripCount ();
708+ if (TailFolded) {
709+ if (CheckNeededWithTailFolding) {
710+ // vscale is not necessarily a power-of-2, which means we cannot guarantee
711+ // an overflow to zero when updating induction variables and so an
712+ // additional overflow check is required before entering the vector loop.
713+
714+ // Get the maximum unsigned value for the type.
715+ VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn (ConstantInt::get (
716+ TripCountTy, cast<IntegerType>(TripCountTy)->getMask ()));
717+ VPValue *DistanceToMax = Builder.createNaryOp (
718+ Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
719+ DebugLoc::getUnknown ());
720+
721+ // Don't execute the vector loop if (UMax - n) < (VF * UF).
722+ // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
723+ // minProfitableTripCount).
724+ TripCountCheck = Builder.createICmp (ICmpInst::ICMP_ULT, DistanceToMax,
725+ Builder.createExpandSCEV (Step), DL);
726+ } else {
727+ // TripCountCheck = false, folding tail implies positive vector trip
728+ // count.
729+ }
730+ } else {
714731 // TODO: Emit unconditional branch to vector preheader instead of
715732 // conditional branch with known condition.
716733 TripCount = SE.applyLoopGuards (TripCount, OrigLoop);
@@ -727,23 +744,6 @@ void VPlanTransforms::addMinimumIterationCheck(
727744 TripCountCheck = Builder.createICmp (
728745 CmpPred, TripCountVPV, MinTripCountVPV, DL, " min.iters.check" );
729746 } // else step known to be < trip count, use TripCountCheck preset to false.
730- } else if (CheckNeededWithTailFolding) {
731- // vscale is not necessarily a power-of-2, which means we cannot guarantee
732- // an overflow to zero when updating induction variables and so an
733- // additional overflow check is required before entering the vector loop.
734-
735- // Get the maximum unsigned value for the type.
736- VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn (ConstantInt::get (
737- TripCountTy, cast<IntegerType>(TripCountTy)->getMask ()));
738- VPValue *DistanceToMax =
739- Builder.createNaryOp (Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
740- DebugLoc::getUnknown ());
741-
742- // Don't execute the vector loop if (UMax - n) < (VF * UF).
743- // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
744- // minProfitableTripCount).
745- TripCountCheck = Builder.createICmp (ICmpInst::ICMP_ULT, DistanceToMax,
746- Builder.createExpandSCEV (Step), DL);
747747 }
748748 VPInstruction *Term =
749749 Builder.createNaryOp (VPInstruction::BranchOnCond, {TripCountCheck}, DL);
0 commit comments