@@ -4380,11 +4380,13 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
43804380 assert (!isa<SCEVCouldNotCompute>(TC) && " Trip count SCEV must be computable" );
43814381 const SCEV *KnownMinTC;
43824382 bool ScalableTC = match (TC, m_scev_c_Mul (m_SCEV (KnownMinTC), m_SCEVVScale ()));
4383+ bool ScalableRemIter = false ;
43834384 // Use versions of TC and VF in which both are either scalable or fixed.
4384- if (ScalableTC == MainLoopVF.isScalable ())
4385+ if (ScalableTC == MainLoopVF.isScalable ()) {
4386+ ScalableRemIter = ScalableTC;
43854387 RemainingIterations =
43864388 SE.getURemExpr (TC, SE.getElementCount (TCType, MainLoopVF * IC));
4387- else if (ScalableTC) {
4389+ } else if (ScalableTC) {
43884390 const SCEV *EstimatedTC = SE.getMulExpr (
43894391 KnownMinTC,
43904392 SE.getConstant (TCType, CM.getVScaleForTuning ().value_or (1 )));
@@ -4407,15 +4409,6 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44074409 LLVM_DEBUG (dbgs () << " LEV: Maximum Trip Count for Epilogue: "
44084410 << MaxTripCount << " \n " );
44094411 }
4410- // Check if the RemainingIterations is scalable.
4411- const SCEV *KnownMinRemIter = nullptr , *EstimatedRemIter = nullptr ;
4412- bool ScalableRemIter =
4413- match (RemainingIterations,
4414- m_scev_c_Mul (m_SCEV (KnownMinRemIter), m_SCEVVScale ()));
4415- if (ScalableRemIter)
4416- EstimatedRemIter = SE.getMulExpr (
4417- KnownMinRemIter,
4418- SE.getConstant (TCType, CM.getVScaleForTuning ().value_or (1 )));
44194412
44204413 auto SkipVF = [&](const SCEV *VF, const SCEV *RemIter) -> bool {
44214414 return SE.isKnownPredicate (CmpInst::ICMP_UGT, VF, RemIter);
@@ -4437,21 +4430,21 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
44374430
44384431 // If NextVF is greater than the number of remaining iterations, the
44394432 // epilogue loop would be dead. Skip such factors.
4440- if (ScalableRemIter == NextVF.Width .isScalable ()) {
4441- if (SkipVF (SE.getElementCount (TCType, NextVF.Width ), RemainingIterations))
4433+ // TODO: We should also consider comparing against scalable RemIter when
4434+ // SCEV be able to evaluate non-canonical vscale-based expressions.
4435+ if (!ScalableRemIter) {
4436+ // Handle the case where NextVF and RemainingIterations are in different
4437+ // numerical spaces.
4438+ if (NextVF.Width .isScalable ()) {
4439+ ElementCount EstimatedRuntimeNextVF = ElementCount::getFixed (
4440+ estimateElementCount (NextVF.Width , CM.getVScaleForTuning ()));
4441+ if (SkipVF (SE.getElementCount (TCType, EstimatedRuntimeNextVF),
4442+ RemainingIterations))
4443+ continue ;
4444+ } else if (SkipVF (SE.getElementCount (TCType, NextVF.Width ),
4445+ RemainingIterations))
44424446 continue ;
44434447 }
4444- // Handle the case where NextVF and RemainingIterations are in different
4445- // numerical spaces.
4446- else if (NextVF.Width .isScalable ()) {
4447- ElementCount EstimatedRuntimeNextVF = ElementCount::getFixed (
4448- estimateElementCount (NextVF.Width , CM.getVScaleForTuning ()));
4449- if (SkipVF (SE.getElementCount (TCType, EstimatedRuntimeNextVF),
4450- RemainingIterations))
4451- continue ;
4452- } else if (SkipVF (SE.getElementCount (TCType, NextVF.Width ),
4453- EstimatedRemIter))
4454- continue ;
44554448
44564449 if (Result.Width .isScalar () ||
44574450 isMoreProfitable (NextVF, Result, MaxTripCount, !CM.foldTailByMasking (),
0 commit comments