@@ -7512,7 +7512,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75127512
75137513 VPValue *Mask = nullptr ;
75147514 if (Legal->isMaskRequired (I))
7515- Mask = getBlockInMask ( Builder.getInsertBlock ());
7515+ Mask = Builder.getInsertBlock ()-> getEntryMask ( );
75167516
75177517 // Determine if the pointer operand of the access is either consecutive or
75187518 // reverse consecutive.
@@ -7708,7 +7708,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
77087708 // all-true mask.
77097709 VPValue *Mask = nullptr ;
77107710 if (Legal->isMaskRequired (CI))
7711- Mask = getBlockInMask ( Builder.getInsertBlock ());
7711+ Mask = Builder.getInsertBlock ()-> getEntryMask ( );
77127712 else
77137713 Mask = Plan.getOrAddLiveIn (
77147714 ConstantInt::getTrue (IntegerType::getInt1Ty (CI->getContext ())));
@@ -7750,7 +7750,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
77507750 // div/rem operation itself. Otherwise fall through to general handling below.
77517751 if (CM.isPredicatedInst (I)) {
77527752 SmallVector<VPValue *> Ops (Operands);
7753- VPValue *Mask = getBlockInMask ( Builder.getInsertBlock ());
7753+ VPValue *Mask = Builder.getInsertBlock ()-> getEntryMask ( );
77547754 VPValue *One = Plan.getConstantInt (I->getType (), 1u );
77557755 auto *SafeRHS = Builder.createSelect (Mask, Ops[1 ], One, I->getDebugLoc ());
77567756 Ops[1 ] = SafeRHS;
@@ -7830,7 +7830,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
78307830 // In case of predicated execution (due to tail-folding, or conditional
78317831 // execution, or both), pass the relevant mask.
78327832 if (Legal->isMaskRequired (HI->Store ))
7833- HGramOps.push_back (getBlockInMask ( Builder.getInsertBlock ()));
7833+ HGramOps.push_back (Builder.getInsertBlock ()-> getEntryMask ( ));
78347834
78357835 return new VPHistogramRecipe (Opcode, HGramOps, HI->Store ->getDebugLoc ());
78367836}
@@ -7884,7 +7884,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
78847884 // added initially. Masked replicate recipes will later be placed under an
78857885 // if-then construct to prevent side-effects. Generate recipes to compute
78867886 // the block mask for this region.
7887- BlockInMask = getBlockInMask ( Builder.getInsertBlock ());
7887+ BlockInMask = Builder.getInsertBlock ()-> getEntryMask ( );
78887888 }
78897889
78907890 // Note that there is some custom logic to mark some intrinsics as uniform
@@ -8175,7 +8175,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81758175 ReductionOpcode == Instruction::Sub) &&
81768176 " Expected an ADD or SUB operation for predicated partial "
81778177 " reductions (because the neutral element in the mask is zero)!" );
8178- Cond = getBlockInMask ( Builder.getInsertBlock ());
8178+ Cond = Builder.getInsertBlock ()-> getEntryMask ( );
81798179 VPValue *Zero = Plan.getConstantInt (Reduction->getType (), 0 );
81808180 BinOp = Builder.createSelect (Cond, BinOp, Zero, Reduction->getDebugLoc ());
81818181 }
@@ -8302,15 +8302,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83028302 // ---------------------------------------------------------------------------
83038303 // Predicate and linearize the top-level loop region.
83048304 // ---------------------------------------------------------------------------
8305- auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize (
8306- *Plan, CM.foldTailByMasking ());
8305+ VPlanTransforms::introduceMasksAndLinearize (*Plan, CM.foldTailByMasking ());
83078306
83088307 // ---------------------------------------------------------------------------
83098308 // Construct wide recipes and apply predication for original scalar
83108309 // VPInstructions in the loop.
83118310 // ---------------------------------------------------------------------------
83128311 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8313- Builder, BlockMaskCache, LVer);
8312+ Builder, LVer);
83148313 RecipeBuilder.collectScaledReductions (Range);
83158314
83168315 // Scan the body of the loop in a topological order to visit each basic block
@@ -8321,9 +8320,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83218320
83228321 auto *MiddleVPBB = Plan->getMiddleBlock ();
83238322 VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
8324- // Mapping from VPValues in the initial plan to their widened VPValues. Needed
8325- // temporarily to update created block masks.
8326- DenseMap<VPValue *, VPValue *> Old2New;
83278323 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
83288324 // Convert input VPInstructions to widened recipes.
83298325 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
@@ -8377,7 +8373,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83778373 }
83788374 if (Recipe->getNumDefinedValues () == 1 ) {
83798375 SingleDef->replaceAllUsesWith (Recipe->getVPSingleValue ());
8380- Old2New[ SingleDef] = Recipe-> getVPSingleValue ();
8376+ SingleDef-> eraseFromParent ();
83818377 } else {
83828378 assert (Recipe->getNumDefinedValues () == 0 &&
83838379 " Unexpected multidef recipe" );
@@ -8386,14 +8382,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83868382 }
83878383 }
83888384
8389- // replaceAllUsesWith above may invalidate the block masks. Update them here.
8390- // TODO: Include the masks as operands in the predicated VPlan directly
8391- // to remove the need to keep a map of masks beyond the predication
8392- // transform.
8393- RecipeBuilder.updateBlockMaskCache (Old2New);
8394- for (VPValue *Old : Old2New.keys ())
8395- Old->getDefiningRecipe ()->eraseFromParent ();
8396-
83978385 assert (isa<VPRegionBlock>(LoopRegion) &&
83988386 !LoopRegion->getEntryBasicBlock ()->empty () &&
83998387 " entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8427,6 +8415,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84278415 // Adjust the recipes for any inloop reductions.
84288416 adjustRecipesForReductions (Plan, RecipeBuilder, Range.Start );
84298417
8418+ // Erase the block entry masks, since they're not used any longer, so that
8419+ // future transforms only deal with recipe VPUsers.
8420+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
8421+ VPBB->eraseEntryMask ();
8422+
84308423 // Apply mandatory transformation to handle FP maxnum/minnum reduction with
84318424 // NaNs if possible, bail out otherwise.
84328425 if (!VPlanTransforms::runPass (VPlanTransforms::handleMaxMinNumReductions,
@@ -8517,9 +8510,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
85178510
85188511 // Collect mapping of IR header phis to header phi recipes, to be used in
85198512 // addScalarResumePhis.
8520- DenseMap<VPBasicBlock *, VPValue *> BlockMaskCache;
85218513 VPRecipeBuilder RecipeBuilder (*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8522- Builder, BlockMaskCache, nullptr /* LVer*/ );
8514+ Builder, nullptr /* LVer*/ );
85238515 for (auto &R : Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
85248516 if (isa<VPCanonicalIVPHIRecipe>(&R))
85258517 continue ;
@@ -8677,7 +8669,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86778669
86788670 VPValue *CondOp = nullptr ;
86798671 if (CM.blockNeedsPredicationForAnyReason (CurrentLinkI->getParent ()))
8680- CondOp = RecipeBuilder. getBlockInMask ( CurrentLink->getParent ());
8672+ CondOp = CurrentLink->getParent ()-> getEntryMask ( );
86818673
86828674 // TODO: Retrieve FMFs from recipes directly.
86838675 RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor (
@@ -8725,7 +8717,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87258717 // different numbers of lanes. Partial reductions mask the input instead.
87268718 if (!PhiR->isInLoop () && CM.foldTailByMasking () &&
87278719 !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe ())) {
8728- VPValue *Cond = RecipeBuilder. getBlockInMask ( PhiR->getParent ());
8720+ VPValue *Cond = PhiR->getParent ()-> getEntryMask ( );
87298721 std::optional<FastMathFlags> FMFs =
87308722 PhiTy->isFloatingPointTy ()
87318723 ? std::make_optional (RdxDesc.getFastMathFlags ())
0 commit comments