@@ -393,6 +393,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
393393 cl::desc(
394394 " Enable vectorization of early exit loops with uncountable exits." ));
395395
396+ static cl::opt<bool >
397+ EnableEarlyExitWithFFLoads (" enable-early-exit-with-ffload" , cl::init(false ),
398+ cl::Hidden,
399+ cl::desc(" Enable vectorization of early-exit "
400+ " loops with fault-only-first loads." ));
401+
396402static cl::opt<bool > ConsiderRegPressure (
397403 " vectorizer-consider-reg-pressure" , cl::init(false ), cl::Hidden,
398404 cl::desc(" Discard VFs if their register pressure is too high." ));
@@ -3485,6 +3491,15 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
34853491 return FixedScalableVFPair::getNone ();
34863492 }
34873493
3494+ if (!Legal->getPotentiallyFaultingLoads ().empty () && UserIC > 1 ) {
3495+ reportVectorizationFailure (" Auto-vectorization of loops with potentially "
3496+ " faulting loads is not supported when the "
3497+ " interleave count is more than 1" ,
3498+ " CantInterleaveLoopWithPotentiallyFaultingLoads" ,
3499+ ORE, TheLoop);
3500+ return FixedScalableVFPair::getNone ();
3501+ }
3502+
34883503 ScalarEvolution *SE = PSE.getSE ();
34893504 ElementCount TC = getSmallConstantTripCount (SE, TheLoop);
34903505 unsigned MaxTC = PSE.getSmallConstantMaxTripCount ();
@@ -4054,6 +4069,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40544069 case VPDef::VPReductionPHISC:
40554070 case VPDef::VPInterleaveEVLSC:
40564071 case VPDef::VPInterleaveSC:
4072+ case VPDef::VPWidenFFLoadSC:
40574073 case VPDef::VPWidenLoadEVLSC:
40584074 case VPDef::VPWidenLoadSC:
40594075 case VPDef::VPWidenStoreEVLSC:
@@ -4528,6 +4544,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45284544 if (!Legal->isSafeForAnyVectorWidth ())
45294545 return 1 ;
45304546
4547+ // No interleaving for potentially faulting loads.
4548+ if (!Legal->getPotentiallyFaultingLoads ().empty ())
4549+ return 1 ;
4550+
45314551 // We don't attempt to perform interleaving for loops with uncountable early
45324552 // exits because the VPInstruction::AnyOf code cannot currently handle
45334553 // multiple parts.
@@ -7213,6 +7233,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72137233 // Regions are dissolved after optimizing for VF and UF, which completely
72147234 // removes unneeded loop regions first.
72157235 VPlanTransforms::dissolveLoopRegions (BestVPlan);
7236+
7237+ VPlanTransforms::convertFFLoadEarlyExitToVLStepping (BestVPlan);
7238+
72167239 // Canonicalize EVL loops after regions are dissolved.
72177240 VPlanTransforms::canonicalizeEVLLoops (BestVPlan);
72187241 VPlanTransforms::materializeBackedgeTakenCount (BestVPlan, VectorPH);
@@ -7500,6 +7523,10 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75007523 Builder.insert (VectorPtr);
75017524 Ptr = VectorPtr;
75027525 }
7526+ if (Legal->getPotentiallyFaultingLoads ().contains (I))
7527+ return new VPWidenFFLoadRecipe (*cast<LoadInst>(I), Ptr, &Plan.getVF (), Mask,
7528+ VPIRMetadata (*I, LVer), I->getDebugLoc ());
7529+
75037530 if (LoadInst *Load = dyn_cast<LoadInst>(I))
75047531 return new VPWidenLoadRecipe (*Load, Ptr, Mask, Consecutive, Reverse,
75057532 VPIRMetadata (*Load, LVer), I->getDebugLoc ());
@@ -8538,6 +8565,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85388565 if (Recipe->getNumDefinedValues () == 1 ) {
85398566 SingleDef->replaceAllUsesWith (Recipe->getVPSingleValue ());
85408567 Old2New[SingleDef] = Recipe->getVPSingleValue ();
8568+ } else if (isa<VPWidenFFLoadRecipe>(Recipe)) {
8569+ VPValue *Data = Recipe->getVPValue (0 );
8570+ SingleDef->replaceAllUsesWith (Data);
8571+ Old2New[SingleDef] = Data;
85418572 } else {
85428573 assert (Recipe->getNumDefinedValues () == 0 &&
85438574 " Unexpected multidef recipe" );
@@ -8585,6 +8616,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85858616 // Adjust the recipes for any inloop reductions.
85868617 adjustRecipesForReductions (Plan, RecipeBuilder, Range.Start );
85878618
8619+ VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety (*Plan);
8620+
85888621 // Apply mandatory transformation to handle FP maxnum/minnum reduction with
85898622 // NaNs if possible, bail out otherwise.
85908623 if (!VPlanTransforms::runPass (VPlanTransforms::handleMaxMinNumReductions,
@@ -9893,7 +9926,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98939926 return false ;
98949927 }
98959928
9896- if (!LVL.getPotentiallyFaultingLoads ().empty ()) {
9929+ if (EnableEarlyExitWithFFLoads) {
9930+ if (LVL.getPotentiallyFaultingLoads ().size () > 1 ) {
9931+ reportVectorizationFailure (" Auto-vectorization of loops with more than 1 "
9932+ " potentially faulting load is not enabled" ,
9933+ " MoreThanOnePotentiallyFaultingLoad" , ORE, L);
9934+ return false ;
9935+ }
9936+ } else if (!LVL.getPotentiallyFaultingLoads ().empty ()) {
98979937 reportVectorizationFailure (" Auto-vectorization of loops with potentially "
98989938 " faulting load is not supported" ,
98999939 " PotentiallyFaultingLoadsNotSupported" , ORE, L);
0 commit comments