@@ -7512,7 +7512,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75127512
75137513 VPValue *Mask = nullptr ;
75147514 if (Legal->isMaskRequired (I))
7515- Mask = getBlockInMask ( Builder.getInsertBlock ());
7515+ Mask = Builder.getInsertBlock ()-> getEntryMask ( );
75167516
75177517 // Determine if the pointer operand of the access is either consecutive or
75187518 // reverse consecutive.
@@ -7709,7 +7709,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
77097709 // all-true mask.
77107710 VPValue *Mask = nullptr ;
77117711 if (Legal->isMaskRequired (CI))
7712- Mask = getBlockInMask ( Builder.getInsertBlock ());
7712+ Mask = Builder.getInsertBlock ()-> getEntryMask ( );
77137713 else
77147714 Mask = Plan.getOrAddLiveIn (
77157715 ConstantInt::getTrue (IntegerType::getInt1Ty (CI->getContext ())));
@@ -7751,7 +7751,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
77517751 // div/rem operation itself. Otherwise fall through to general handling below.
77527752 if (CM.isPredicatedInst (I)) {
77537753 SmallVector<VPValue *> Ops (Operands);
7754- VPValue *Mask = getBlockInMask ( Builder.getInsertBlock ());
7754+ VPValue *Mask = Builder.getInsertBlock ()-> getEntryMask ( );
77557755 VPValue *One = Plan.getConstantInt (I->getType (), 1u );
77567756 auto *SafeRHS = Builder.createSelect (Mask, Ops[1 ], One, I->getDebugLoc ());
77577757 Ops[1 ] = SafeRHS;
@@ -7831,7 +7831,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
78317831 // In case of predicated execution (due to tail-folding, or conditional
78327832 // execution, or both), pass the relevant mask.
78337833 if (Legal->isMaskRequired (HI->Store ))
7834- HGramOps.push_back (getBlockInMask ( Builder.getInsertBlock ()));
7834+ HGramOps.push_back (Builder.getInsertBlock ()-> getEntryMask ( ));
78357835
78367836 return new VPHistogramRecipe (Opcode, HGramOps, HI->Store ->getDebugLoc ());
78377837}
@@ -7885,7 +7885,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
78857885 // added initially. Masked replicate recipes will later be placed under an
78867886 // if-then construct to prevent side-effects. Generate recipes to compute
78877887 // the block mask for this region.
7888- BlockInMask = getBlockInMask ( Builder.getInsertBlock ());
7888+ BlockInMask = Builder.getInsertBlock ()-> getEntryMask ( );
78897889 }
78907890
78917891 // Note that there is some custom logic to mark some intrinsics as uniform
@@ -8176,7 +8176,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81768176 ReductionOpcode == Instruction::Sub) &&
81778177 " Expected an ADD or SUB operation for predicated partial "
81788178 " reductions (because the neutral element in the mask is zero)!" );
8179- Cond = getBlockInMask ( Builder.getInsertBlock ());
8179+ Cond = Builder.getInsertBlock ()-> getEntryMask ( );
81808180 VPValue *Zero = Plan.getConstantInt (Reduction->getType (), 0 );
81818181 BinOp = Builder.createSelect (Cond, BinOp, Zero, Reduction->getDebugLoc ());
81828182 }
@@ -8303,15 +8303,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83038303 // ---------------------------------------------------------------------------
83048304 // Predicate and linearize the top-level loop region.
83058305 // ---------------------------------------------------------------------------
8306- auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize (
8307- *Plan, CM.foldTailByMasking ());
8306+ VPlanTransforms::introduceMasksAndLinearize (*Plan, CM.foldTailByMasking ());
83088307
83098308 // ---------------------------------------------------------------------------
83108309 // Construct wide recipes and apply predication for original scalar
83118310 // VPInstructions in the loop.
83128311 // ---------------------------------------------------------------------------
83138312 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8314- Builder, BlockMaskCache, LVer);
8313+ Builder, LVer);
83158314 RecipeBuilder.collectScaledReductions (Range);
83168315
83178316 // Scan the body of the loop in a topological order to visit each basic block
@@ -8322,9 +8321,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83228321
83238322 auto *MiddleVPBB = Plan->getMiddleBlock ();
83248323 VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
8325- // Mapping from VPValues in the initial plan to their widened VPValues. Needed
8326- // temporarily to update created block masks.
8327- DenseMap<VPValue *, VPValue *> Old2New;
83288324 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
83298325 // Convert input VPInstructions to widened recipes.
83308326 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
@@ -8378,7 +8374,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83788374 }
83798375 if (Recipe->getNumDefinedValues () == 1 ) {
83808376 SingleDef->replaceAllUsesWith (Recipe->getVPSingleValue ());
8381- Old2New[ SingleDef] = Recipe-> getVPSingleValue ();
8377+ SingleDef-> eraseFromParent ();
83828378 } else {
83838379 assert (Recipe->getNumDefinedValues () == 0 &&
83848380 " Unexpected multidef recipe" );
@@ -8387,14 +8383,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83878383 }
83888384 }
83898385
8390- // replaceAllUsesWith above may invalidate the block masks. Update them here.
8391- // TODO: Include the masks as operands in the predicated VPlan directly
8392- // to remove the need to keep a map of masks beyond the predication
8393- // transform.
8394- RecipeBuilder.updateBlockMaskCache (Old2New);
8395- for (VPValue *Old : Old2New.keys ())
8396- Old->getDefiningRecipe ()->eraseFromParent ();
8397-
83988386 assert (isa<VPRegionBlock>(LoopRegion) &&
83998387 !LoopRegion->getEntryBasicBlock ()->empty () &&
84008388 " entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8428,6 +8416,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84288416 // Adjust the recipes for any inloop reductions.
84298417 adjustRecipesForReductions (Plan, RecipeBuilder, Range.Start );
84308418
8419+ // Erase the block entry masks, since they're not used any longer, so that
8420+ // future transforms only deal with recipe VPUsers.
8421+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
8422+ VPBB->eraseEntryMask ();
8423+
84318424 // Apply mandatory transformation to handle FP maxnum/minnum reduction with
84328425 // NaNs if possible, bail out otherwise.
84338426 if (!VPlanTransforms::runPass (VPlanTransforms::handleMaxMinNumReductions,
@@ -8518,9 +8511,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
85188511
85198512 // Collect mapping of IR header phis to header phi recipes, to be used in
85208513 // addScalarResumePhis.
8521- DenseMap<VPBasicBlock *, VPValue *> BlockMaskCache;
85228514 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8523- Builder, BlockMaskCache, nullptr /* LVer*/ );
8515+ Builder, nullptr /* LVer*/ );
85248516 for (auto &R : Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
85258517 if (isa<VPCanonicalIVPHIRecipe>(&R))
85268518 continue ;
@@ -8678,7 +8670,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86788670
86798671 VPValue *CondOp = nullptr ;
86808672 if (CM.blockNeedsPredicationForAnyReason (CurrentLinkI->getParent ()))
8681- CondOp = RecipeBuilder. getBlockInMask ( CurrentLink->getParent ());
8673+ CondOp = CurrentLink->getParent ()-> getEntryMask ( );
86828674
86838675 // TODO: Retrieve FMFs from recipes directly.
86848676 RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor (
@@ -8726,7 +8718,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87268718 // different numbers of lanes. Partial reductions mask the input instead.
87278719 if (!PhiR->isInLoop () && CM.foldTailByMasking () &&
87288720 !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe ())) {
8729- VPValue *Cond = RecipeBuilder. getBlockInMask ( PhiR->getParent ());
8721+ VPValue *Cond = PhiR->getParent ()-> getEntryMask ( );
87308722 std::optional<FastMathFlags> FMFs =
87318723 PhiTy->isFloatingPointTy ()
87328724 ? std::make_optional (RdxDesc.getFastMathFlags ())
0 commit comments