@@ -4016,14 +4016,17 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40164016 }
40174017
40184018 auto NoScalarEpilogueNeeded = [this , &UserIC](unsigned MaxVF) {
4019+ if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch () &&
4020+ !Legal->hasUncountableEarlyExit ())
4021+ return false ;
40194022 unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF;
40204023 ScalarEvolution *SE = PSE.getSE ();
4021- // Currently only loops with countable exits are vectorized, but calling
4022- // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
4023- // uncountable exits whilst also ensuring the symbolic maximum and known
4024- // back-edge taken count remain identical for loops with countable exits.
4024+ // Calling getSymbolicMaxBackedgeTakenCount enables support for loops
4025+ // with uncountable exits. For countable loops, the symbolic maximum must
4026+ // remain identical to the known back-edge taken count.
40254027 const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount ();
4026- assert (BackedgeTakenCount == PSE.getBackedgeTakenCount () &&
4028+ assert ((Legal->hasUncountableEarlyExit () ||
4029+ BackedgeTakenCount == PSE.getBackedgeTakenCount ()) &&
40274030 " Invalid loop count" );
40284031 const SCEV *ExitCount = SE->getAddExpr (
40294032 BackedgeTakenCount, SE->getOne (BackedgeTakenCount->getType ()));
@@ -4033,9 +4036,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40334036 return Rem->isZero ();
40344037 };
40354038
4036- bool HasSingleLatchExit =
4037- TheLoop->getExitingBlock () == TheLoop->getLoopLatch ();
4038- if (HasSingleLatchExit && MaxPowerOf2RuntimeVF > 0u ) {
4039+ if (MaxPowerOf2RuntimeVF > 0u ) {
40394040 assert ((UserVF.isNonZero () || isPowerOf2_32 (*MaxPowerOf2RuntimeVF)) &&
40404041 " MaxFixedVF must be a power of 2" );
40414042 if (NoScalarEpilogueNeeded (*MaxPowerOf2RuntimeVF)) {
@@ -4046,8 +4047,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40464047 }
40474048
40484049 auto ExpectedTC = getSmallBestKnownTC (PSE, TheLoop);
4049- if (HasSingleLatchExit && ExpectedTC &&
4050- ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold ()) {
4050+ if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold ()) {
40514051 if (MaxPowerOf2RuntimeVF > 0u ) {
40524052 // If we have a low-trip-count, and the fixed-width VF is known to divide
40534053 // the trip count but the scalable factor does not, use the fixed-width
0 commit comments