@@ -2432,14 +2432,20 @@ Value *InnerLoopVectorizer::createIterationCountCheck(ElementCount VF,
24322432 // check is known to be true, or known to be false.
24332433 CheckMinIters = Builder.CreateICmp (P, Count, Step, " min.iters.check" );
24342434 } // else step known to be < trip count, use CheckMinIters preset to false.
2435- } else {
2436- // If we're tail folding, then as long as our VF is a factor of two
2437- // we'll wrap to zero and don't need an explicit iterations check.
2438- // Per the LangRef, vscale is not necessarily a power-of-2, but all
2439- // in tree targets are
2440- assert (VF.isKnownMultipleOf (2 ) ||
2441- (!VF.isScalable () && 1 == VF.getKnownMinValue ()) ||
2442- (VF.isScalable () && TTI->isVScaleKnownToBeAPowerOfTwo ()));
2435+ } else if (VF.isScalable () && !TTI->isVScaleKnownToBeAPowerOfTwo () &&
2436+ !isIndvarOverflowCheckKnownFalse (Cost, VF, UF) &&
2437+ Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
2438+ // vscale is not necessarily a power-of-2, which means we cannot guarantee
2439+ // an overflow to zero when updating induction variables and so an
2440+ // additional overflow check is required before entering the vector loop.
2441+
2442+ // Get the maximum unsigned value for the type.
2443+ Value *MaxUIntTripCount =
2444+ ConstantInt::get (CountTy, cast<IntegerType>(CountTy)->getMask ());
2445+ Value *LHS = Builder.CreateSub (MaxUIntTripCount, Count);
2446+
2447+ // Don't execute the vector loop if (UMax - n) < (VF * UF).
2448+ CheckMinIters = Builder.CreateICmp (ICmpInst::ICMP_ULT, LHS, CreateStep ());
24432449 }
24442450 return CheckMinIters;
24452451}
0 commit comments