@@ -7512,7 +7512,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75127512
75137513 VPValue *Mask = nullptr ;
75147514 if (Legal->isMaskRequired (I))
7515- Mask = getBlockInMask ( Builder.getInsertBlock ());
7515+ Mask = Builder.getInsertBlock ()-> getEntryMask ( );
75167516
75177517 // Determine if the pointer operand of the access is either consecutive or
75187518 // reverse consecutive.
@@ -7709,7 +7709,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
77097709 // all-true mask.
77107710 VPValue *Mask = nullptr ;
77117711 if (Legal->isMaskRequired (CI))
7712- Mask = getBlockInMask ( Builder.getInsertBlock ());
7712+ Mask = Builder.getInsertBlock ()-> getEntryMask ( );
77137713 else
77147714 Mask = Plan.getOrAddLiveIn (
77157715 ConstantInt::getTrue (IntegerType::getInt1Ty (CI->getContext ())));
@@ -7751,7 +7751,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
77517751 // div/rem operation itself. Otherwise fall through to general handling below.
77527752 if (CM.isPredicatedInst (I)) {
77537753 SmallVector<VPValue *> Ops (Operands);
7754- VPValue *Mask = getBlockInMask ( Builder.getInsertBlock ());
7754+ VPValue *Mask = Builder.getInsertBlock ()-> getEntryMask ( );
77557755 VPValue *One =
77567756 Plan.getOrAddLiveIn (ConstantInt::get (I->getType (), 1u , false ));
77577757 auto *SafeRHS = Builder.createSelect (Mask, Ops[1 ], One, I->getDebugLoc ());
@@ -7833,7 +7833,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
78337833 // In case of predicated execution (due to tail-folding, or conditional
78347834 // execution, or both), pass the relevant mask.
78357835 if (Legal->isMaskRequired (HI->Store ))
7836- HGramOps.push_back (getBlockInMask ( Builder.getInsertBlock ()));
7836+ HGramOps.push_back (Builder.getInsertBlock ()-> getEntryMask ( ));
78377837
78387838 return new VPHistogramRecipe (Opcode, HGramOps, HI->Store ->getDebugLoc ());
78397839}
@@ -7887,7 +7887,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
78877887 // added initially. Masked replicate recipes will later be placed under an
78887888 // if-then construct to prevent side-effects. Generate recipes to compute
78897889 // the block mask for this region.
7890- BlockInMask = getBlockInMask ( Builder.getInsertBlock ());
7890+ BlockInMask = Builder.getInsertBlock ()-> getEntryMask ( );
78917891 }
78927892
78937893 // Note that there is some custom logic to mark some intrinsics as uniform
@@ -8178,7 +8178,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81788178 ReductionOpcode == Instruction::Sub) &&
81798179 " Expected an ADD or SUB operation for predicated partial "
81808180 " reductions (because the neutral element in the mask is zero)!" );
8181- Cond = getBlockInMask ( Builder.getInsertBlock ());
8181+ Cond = Builder.getInsertBlock ()-> getEntryMask ( );
81828182 VPValue *Zero =
81838183 Plan.getOrAddLiveIn (ConstantInt::get (Reduction->getType (), 0 ));
81848184 BinOp = Builder.createSelect (Cond, BinOp, Zero, Reduction->getDebugLoc ());
@@ -8306,15 +8306,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83068306 // ---------------------------------------------------------------------------
83078307 // Predicate and linearize the top-level loop region.
83088308 // ---------------------------------------------------------------------------
8309- auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize (
8310- *Plan, CM.foldTailByMasking ());
8309+ VPlanTransforms::introduceMasksAndLinearize (*Plan, CM.foldTailByMasking ());
83118310
83128311 // ---------------------------------------------------------------------------
83138312 // Construct wide recipes and apply predication for original scalar
83148313 // VPInstructions in the loop.
83158314 // ---------------------------------------------------------------------------
83168315 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8317- Builder, BlockMaskCache, LVer);
8316+ Builder, LVer);
83188317 RecipeBuilder.collectScaledReductions (Range);
83198318
83208319 // Scan the body of the loop in a topological order to visit each basic block
@@ -8325,9 +8324,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83258324
83268325 auto *MiddleVPBB = Plan->getMiddleBlock ();
83278326 VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
8328- // Mapping from VPValues in the initial plan to their widened VPValues. Needed
8329- // temporarily to update created block masks.
8330- DenseMap<VPValue *, VPValue *> Old2New;
83318327 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
83328328 // Convert input VPInstructions to widened recipes.
83338329 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
@@ -8381,7 +8377,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83818377 }
83828378 if (Recipe->getNumDefinedValues () == 1 ) {
83838379 SingleDef->replaceAllUsesWith (Recipe->getVPSingleValue ());
8384- Old2New[ SingleDef] = Recipe-> getVPSingleValue ();
8380+ SingleDef-> eraseFromParent ();
83858381 } else {
83868382 assert (Recipe->getNumDefinedValues () == 0 &&
83878383 " Unexpected multidef recipe" );
@@ -8390,14 +8386,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83908386 }
83918387 }
83928388
8393- // replaceAllUsesWith above may invalidate the block masks. Update them here.
8394- // TODO: Include the masks as operands in the predicated VPlan directly
8395- // to remove the need to keep a map of masks beyond the predication
8396- // transform.
8397- RecipeBuilder.updateBlockMaskCache (Old2New);
8398- for (VPValue *Old : Old2New.keys ())
8399- Old->getDefiningRecipe ()->eraseFromParent ();
8400-
84018389 assert (isa<VPRegionBlock>(LoopRegion) &&
84028390 !LoopRegion->getEntryBasicBlock ()->empty () &&
84038391 " entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8431,6 +8419,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84318419 // Adjust the recipes for any inloop reductions.
84328420 adjustRecipesForReductions (Plan, RecipeBuilder, Range.Start );
84338421
8422+ // Erase the block entry masks, since they're not used any longer, so that
8423+ // future transforms only deal with recipe VPUsers.
8424+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
8425+ VPBB->eraseEntryMask ();
8426+
84348427 // Apply mandatory transformation to handle FP maxnum/minnum reduction with
84358428 // NaNs if possible, bail out otherwise.
84368429 if (!VPlanTransforms::runPass (VPlanTransforms::handleMaxMinNumReductions,
@@ -8521,9 +8514,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
85218514
85228515 // Collect mapping of IR header phis to header phi recipes, to be used in
85238516 // addScalarResumePhis.
8524- DenseMap<VPBasicBlock *, VPValue *> BlockMaskCache;
85258517 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8526- Builder, BlockMaskCache, nullptr /* LVer*/ );
8518+ Builder, nullptr /* LVer*/ );
85278519 for (auto &R : Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
85288520 if (isa<VPCanonicalIVPHIRecipe>(&R))
85298521 continue ;
@@ -8681,7 +8673,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86818673
86828674 VPValue *CondOp = nullptr ;
86838675 if (CM.blockNeedsPredicationForAnyReason (CurrentLinkI->getParent ()))
8684- CondOp = RecipeBuilder. getBlockInMask ( CurrentLink->getParent ());
8676+ CondOp = CurrentLink->getParent ()-> getEntryMask ( );
86858677
86868678 // TODO: Retrieve FMFs from recipes directly.
86878679 RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor (
@@ -8729,7 +8721,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87298721 // different numbers of lanes. Partial reductions mask the input instead.
87308722 if (!PhiR->isInLoop () && CM.foldTailByMasking () &&
87318723 !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe ())) {
8732- VPValue *Cond = RecipeBuilder. getBlockInMask ( PhiR->getParent ());
8724+ VPValue *Cond = PhiR->getParent ()-> getEntryMask ( );
87338725 std::optional<FastMathFlags> FMFs =
87348726 PhiTy->isFloatingPointTy ()
87358727 ? std::make_optional (RdxDesc.getFastMathFlags ())
0 commit comments