@@ -1352,27 +1352,46 @@ class LoopVectorizationCostModel {
13521352 return InterleaveInfo.getInterleaveGroup (Instr);
13531353 }
13541354
1355+ // / Calculate in advance whether a scalar epilogue is required when
1356+ // / vectorizing and not vectorizing. If \p Invalidate is true then
1357+ // / invalidate a previous decision.
1358+ void collectScalarEpilogueRequirements (bool Invalidate) {
1359+ auto NeedsScalarEpilogue = [&](bool IsVectorizing) -> bool {
1360+ if (!isScalarEpilogueAllowed ()) {
1361+ LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue" );
1362+ return false ;
1363+ }
1364+ // If we might exit from anywhere but the latch, must run the exiting
1365+ // iteration in scalar form.
1366+ if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1367+ LLVM_DEBUG (
1368+ dbgs () << " LV: Loop requires scalar epilogue: multiple exits" );
1369+ return true ;
1370+ }
1371+ if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1372+ LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
1373+ " interleaved group requires scalar epilogue" );
1374+ return true ;
1375+ }
1376+ LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue" );
1377+ return false ;
1378+ };
1379+
1380+ assert ((Invalidate || !RequiresScalarEpilogue) &&
1381+ " Already determined scalar epilogue requirements!" );
1382+ std::pair<bool , bool > Result;
1383+ Result.first = NeedsScalarEpilogue (true );
1384+ LLVM_DEBUG (dbgs () << " , when vectorizing\n " );
1385+ Result.second = NeedsScalarEpilogue (false );
1386+ LLVM_DEBUG (dbgs () << " , when not vectorizing\n " );
1387+ RequiresScalarEpilogue = Result;
1388+ }
1389+
13551390 // / Returns true if we're required to use a scalar epilogue for at least
13561391 // / the final iteration of the original loop.
13571392 bool requiresScalarEpilogue (bool IsVectorizing) const {
1358- if (!isScalarEpilogueAllowed ()) {
1359- LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
1360- return false ;
1361- }
1362- // If we might exit from anywhere but the latch, must run the exiting
1363- // iteration in scalar form.
1364- if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1365- LLVM_DEBUG (
1366- dbgs () << " LV: Loop requires scalar epilogue: multiple exits\n " );
1367- return true ;
1368- }
1369- if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue ()) {
1370- LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: "
1371- " interleaved group requires scalar epilogue\n " );
1372- return true ;
1373- }
1374- LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
1375- return false ;
1393+ auto &CachedResult = *RequiresScalarEpilogue;
1394+ return IsVectorizing ? CachedResult.first : CachedResult.second ;
13761395 }
13771396
13781397 // / Returns true if we're required to use a scalar epilogue for at least
@@ -1396,6 +1415,15 @@ class LoopVectorizationCostModel {
13961415 return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed;
13971416 }
13981417
1418+ // / Update the ScalarEpilogueStatus to a new value, potentially triggering a
1419+ // / recalculation of the scalar epilogue requirements.
1420+ void setScalarEpilogueStatus (ScalarEpilogueLowering Status) {
1421+ bool Changed = ScalarEpilogueStatus != Status;
1422+ ScalarEpilogueStatus = Status;
1423+ if (Changed)
1424+ collectScalarEpilogueRequirements (/* Invalidate=*/ true );
1425+ }
1426+
13991427 // / Returns the TailFoldingStyle that is best for the current loop.
14001428 TailFoldingStyle getTailFoldingStyle (bool IVUpdateMayOverflow = true ) const {
14011429 if (!ChosenTailFoldingStyle)
@@ -1748,6 +1776,9 @@ class LoopVectorizationCostModel {
17481776
17491777 // / All element types found in the loop.
17501778 SmallPtrSet<Type *, 16 > ElementTypesInLoop;
1779+
1780+ // / Keeps track of whether we require a scalar epilogue.
1781+ std::optional<std::pair<bool , bool >> RequiresScalarEpilogue;
17511782};
17521783} // end namespace llvm
17531784
@@ -4011,7 +4042,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40114042 if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
40124043 LLVM_DEBUG (dbgs () << " LV: Cannot fold tail by masking: vectorize with a "
40134044 " scalar epilogue instead.\n " );
4014- ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
4045+ setScalarEpilogueStatus ( CM_ScalarEpilogueAllowed) ;
40154046 return computeFeasibleMaxVF (MaxTC, UserVF, false );
40164047 }
40174048 return FixedScalableVFPair::getNone ();
@@ -4027,6 +4058,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40274058 // Note: There is no need to invalidate any cost modeling decisions here, as
40284059 // non where taken so far.
40294060 InterleaveInfo.invalidateGroupsRequiringScalarEpilogue ();
4061+ collectScalarEpilogueRequirements (/* Invalidate=*/ true );
40304062 }
40314063
40324064 FixedScalableVFPair MaxFactors = computeFeasibleMaxVF (MaxTC, UserVF, true );
@@ -4098,7 +4130,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40984130 if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
40994131 LLVM_DEBUG (dbgs () << " LV: Cannot fold tail by masking: vectorize with a "
41004132 " scalar epilogue instead.\n " );
4101- ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
4133+ setScalarEpilogueStatus ( CM_ScalarEpilogueAllowed) ;
41024134 return MaxFactors;
41034135 }
41044136
@@ -7006,6 +7038,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
70067038 if (!OrigLoop->isInnermost ()) {
70077039 // If the user doesn't provide a vectorization factor, determine a
70087040 // reasonable one.
7041+ CM.collectScalarEpilogueRequirements (/* Invalidate=*/ false );
70097042 if (UserVF.isZero ()) {
70107043 VF = determineVPlanVF (TTI, CM);
70117044 LLVM_DEBUG (dbgs () << " LV: VPlan computed VF " << VF << " .\n " );
@@ -7050,6 +7083,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
70507083
70517084void LoopVectorizationPlanner::plan (ElementCount UserVF, unsigned UserIC) {
70527085 assert (OrigLoop->isInnermost () && " Inner loop expected." );
7086+ CM.collectScalarEpilogueRequirements (/* Invalidate=*/ false );
70537087 CM.collectValuesToIgnore ();
70547088 CM.collectElementTypesForWidening ();
70557089
@@ -7064,11 +7098,13 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
70647098 dbgs ()
70657099 << " LV: Invalidate all interleaved groups due to fold-tail by masking "
70667100 " which requires masked-interleaved support.\n " );
7067- if (CM.InterleaveInfo .invalidateGroups ())
7101+ if (CM.InterleaveInfo .invalidateGroups ()) {
70687102 // Invalidating interleave groups also requires invalidating all decisions
70697103 // based on them, which includes widening decisions and uniform and scalar
70707104 // values.
70717105 CM.invalidateCostModelingDecisions ();
7106+ CM.collectScalarEpilogueRequirements (/* Invalidate=*/ true );
7107+ }
70727108 }
70737109
70747110 if (CM.foldTailByMasking ())
0 commit comments