Skip to content

Commit 3541146

Browse files
committed
[VPlan] Model entry-mask of VPBB in VPlan (NFC)
Model the entry-mask of a VPBB in VPlan, replacing the Old2New and BlockMasksCache hacks with a simple RAUW. It is observed that the entry-mask of a VPBB is unused after adjustRecipesForReductions: hence, erase all the masks past this transform, avoiding a wide-scale refactor to take non-recipe VPUsers into account.
1 parent 8b3a124 commit 3541146

File tree

6 files changed

+64
-92
lines changed

6 files changed

+64
-92
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7512,7 +7512,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75127512

75137513
VPValue *Mask = nullptr;
75147514
if (Legal->isMaskRequired(I))
7515-
Mask = getBlockInMask(Builder.getInsertBlock());
7515+
Mask = Builder.getInsertBlock()->getEntryMask();
75167516

75177517
// Determine if the pointer operand of the access is either consecutive or
75187518
// reverse consecutive.
@@ -7708,7 +7708,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
77087708
// all-true mask.
77097709
VPValue *Mask = nullptr;
77107710
if (Legal->isMaskRequired(CI))
7711-
Mask = getBlockInMask(Builder.getInsertBlock());
7711+
Mask = Builder.getInsertBlock()->getEntryMask();
77127712
else
77137713
Mask = Plan.getOrAddLiveIn(
77147714
ConstantInt::getTrue(IntegerType::getInt1Ty(CI->getContext())));
@@ -7750,7 +7750,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
77507750
// div/rem operation itself. Otherwise fall through to general handling below.
77517751
if (CM.isPredicatedInst(I)) {
77527752
SmallVector<VPValue *> Ops(Operands);
7753-
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
7753+
VPValue *Mask = Builder.getInsertBlock()->getEntryMask();
77547754
VPValue *One = Plan.getConstantInt(I->getType(), 1u);
77557755
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
77567756
Ops[1] = SafeRHS;
@@ -7830,7 +7830,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
78307830
// In case of predicated execution (due to tail-folding, or conditional
78317831
// execution, or both), pass the relevant mask.
78327832
if (Legal->isMaskRequired(HI->Store))
7833-
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
7833+
HGramOps.push_back(Builder.getInsertBlock()->getEntryMask());
78347834

78357835
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
78367836
}
@@ -7884,7 +7884,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
78847884
// added initially. Masked replicate recipes will later be placed under an
78857885
// if-then construct to prevent side-effects. Generate recipes to compute
78867886
// the block mask for this region.
7887-
BlockInMask = getBlockInMask(Builder.getInsertBlock());
7887+
BlockInMask = Builder.getInsertBlock()->getEntryMask();
78887888
}
78897889

78907890
// Note that there is some custom logic to mark some intrinsics as uniform
@@ -8175,7 +8175,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81758175
ReductionOpcode == Instruction::Sub) &&
81768176
"Expected an ADD or SUB operation for predicated partial "
81778177
"reductions (because the neutral element in the mask is zero)!");
8178-
Cond = getBlockInMask(Builder.getInsertBlock());
8178+
Cond = Builder.getInsertBlock()->getEntryMask();
81798179
VPValue *Zero = Plan.getConstantInt(Reduction->getType(), 0);
81808180
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
81818181
}
@@ -8302,15 +8302,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83028302
// ---------------------------------------------------------------------------
83038303
// Predicate and linearize the top-level loop region.
83048304
// ---------------------------------------------------------------------------
8305-
auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize(
8306-
*Plan, CM.foldTailByMasking());
8305+
VPlanTransforms::introduceMasksAndLinearize(*Plan, CM.foldTailByMasking());
83078306

83088307
// ---------------------------------------------------------------------------
83098308
// Construct wide recipes and apply predication for original scalar
83108309
// VPInstructions in the loop.
83118310
// ---------------------------------------------------------------------------
83128311
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8313-
Builder, BlockMaskCache, LVer);
8312+
Builder, LVer);
83148313
RecipeBuilder.collectScaledReductions(Range);
83158314

83168315
// Scan the body of the loop in a topological order to visit each basic block
@@ -8321,9 +8320,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83218320

83228321
auto *MiddleVPBB = Plan->getMiddleBlock();
83238322
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
8324-
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
8325-
// temporarily to update created block masks.
8326-
DenseMap<VPValue *, VPValue *> Old2New;
83278323
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
83288324
// Convert input VPInstructions to widened recipes.
83298325
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -8377,7 +8373,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83778373
}
83788374
if (Recipe->getNumDefinedValues() == 1) {
83798375
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
8380-
Old2New[SingleDef] = Recipe->getVPSingleValue();
8376+
SingleDef->eraseFromParent();
83818377
} else {
83828378
assert(Recipe->getNumDefinedValues() == 0 &&
83838379
"Unexpected multidef recipe");
@@ -8386,14 +8382,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83868382
}
83878383
}
83888384

8389-
// replaceAllUsesWith above may invalidate the block masks. Update them here.
8390-
// TODO: Include the masks as operands in the predicated VPlan directly
8391-
// to remove the need to keep a map of masks beyond the predication
8392-
// transform.
8393-
RecipeBuilder.updateBlockMaskCache(Old2New);
8394-
for (VPValue *Old : Old2New.keys())
8395-
Old->getDefiningRecipe()->eraseFromParent();
8396-
83978385
assert(isa<VPRegionBlock>(LoopRegion) &&
83988386
!LoopRegion->getEntryBasicBlock()->empty() &&
83998387
"entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8427,6 +8415,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84278415
// Adjust the recipes for any inloop reductions.
84288416
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
84298417

8418+
// Erase the block entry masks, since they're not used any longer, so that
8419+
// future transforms only deal with recipe VPUsers.
8420+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
8421+
VPBB->eraseEntryMask();
8422+
84308423
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
84318424
// NaNs if possible, bail out otherwise.
84328425
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
@@ -8517,9 +8510,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
85178510

85188511
// Collect mapping of IR header phis to header phi recipes, to be used in
85198512
// addScalarResumePhis.
8520-
DenseMap<VPBasicBlock *, VPValue *> BlockMaskCache;
85218513
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8522-
Builder, BlockMaskCache, nullptr /*LVer*/);
8514+
Builder, nullptr /*LVer*/);
85238515
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
85248516
if (isa<VPCanonicalIVPHIRecipe>(&R))
85258517
continue;
@@ -8677,7 +8669,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86778669

86788670
VPValue *CondOp = nullptr;
86798671
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
8680-
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
8672+
CondOp = CurrentLink->getParent()->getEntryMask();
86818673

86828674
// TODO: Retrieve FMFs from recipes directly.
86838675
RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor(
@@ -8725,7 +8717,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87258717
// different numbers of lanes. Partial reductions mask the input instead.
87268718
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
87278719
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
8728-
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
8720+
VPValue *Cond = PhiR->getParent()->getEntryMask();
87298721
std::optional<FastMathFlags> FMFs =
87308722
PhiTy->isFloatingPointTy()
87318723
? std::make_optional(RdxDesc.getFastMathFlags())

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,6 @@ class VPRecipeBuilder {
6767

6868
VPBuilder &Builder;
6969

70-
/// The mask of each VPBB, generated earlier and used for predicating recipes
71-
/// in VPBB.
72-
/// TODO: remove by applying predication when generating the masks.
73-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;
74-
7570
// VPlan construction support: Hold a mapping from ingredients to
7671
// their recipe.
7772
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
@@ -149,11 +144,9 @@ class VPRecipeBuilder {
149144
LoopVectorizationLegality *Legal,
150145
LoopVectorizationCostModel &CM,
151146
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
152-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
153147
LoopVersioning *LVer)
154148
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
155-
CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
156-
LVer(LVer) {}
149+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
157150

158151
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
159152
auto It = ScaledReductionMap.find(ExitInst);
@@ -182,12 +175,6 @@ class VPRecipeBuilder {
182175
Ingredient2Recipe[I] = R;
183176
}
184177

185-
/// Returns the *entry* mask for block \p VPBB or null if the mask is
186-
/// all-true.
187-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
188-
return BlockMaskCache.lookup(VPBB);
189-
}
190-
191178
/// Return the recipe created for given ingredient.
192179
VPRecipeBase *getRecipe(Instruction *I) {
193180
assert(Ingredient2Recipe.count(I) &&
@@ -211,15 +198,6 @@ class VPRecipeBuilder {
211198
}
212199
return Plan.getOrAddLiveIn(V);
213200
}
214-
215-
void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
216-
for (auto &[_, V] : BlockMaskCache) {
217-
if (auto *New = Old2New.lookup(V)) {
218-
V->replaceAllUsesWith(New);
219-
V = New;
220-
}
221-
}
222-
}
223201
};
224202
} // end namespace llvm
225203

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3820,13 +3820,15 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
38203820

38213821
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
38223822
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3823-
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3824-
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3823+
/// output IR instructions. All PHI-like recipes must come before any non-PHI
3824+
/// recipes. It also has an operand corresponding to a mask on which to enter
3825+
/// the block, which is used early in the VPlan construction.
3826+
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase, protected VPUser {
38253827
friend class VPlan;
38263828

38273829
/// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
38283830
VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3829-
: VPBlockBase(VPBasicBlockSC, Name.str()) {
3831+
: VPBlockBase(VPBasicBlockSC, Name.str()), VPUser(VPUBlockSC) {
38303832
if (Recipe)
38313833
appendRecipe(Recipe);
38323834
}
@@ -3839,7 +3841,7 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
38393841
RecipeListTy Recipes;
38403842

38413843
VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3842-
: VPBlockBase(BlockSC, Name.str()) {}
3844+
: VPBlockBase(BlockSC, Name.str()), VPUser(VPUBlockSC) {}
38433845

38443846
public:
38453847
~VPBasicBlock() override {
@@ -3950,6 +3952,21 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
39503952
/// second predecessor is the exiting block of the region.
39513953
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
39523954

3955+
/// Get the entry mask of this block. nullptr is used to communicate an
3956+
/// all-ones mask.
3957+
VPValue *getEntryMask() const {
3958+
return getNumOperands() ? getOperand(0) : nullptr;
3959+
}
3960+
3961+
/// Set the entry mask of this block: used by VPlanPredicator, when
3962+
/// predicating blocks.
3963+
void setEntryMask(VPValue *M) {
3964+
getNumOperands() ? setOperand(0, M) : addOperand(M);
3965+
}
3966+
3967+
/// Erase the entry mask of this block.
3968+
void eraseEntryMask() { eraseOperands(); }
3969+
39533970
protected:
39543971
/// Execute the recipes in the IR basic block \p BB.
39553972
void executeRecipes(VPTransformState *State, BasicBlock *BB);

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,32 +32,15 @@ class VPPredicator {
3232
using EdgeMaskCacheTy =
3333
DenseMap<std::pair<const VPBasicBlock *, const VPBasicBlock *>,
3434
VPValue *>;
35-
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
3635
EdgeMaskCacheTy EdgeMaskCache;
3736

38-
BlockMaskCacheTy BlockMaskCache;
39-
4037
/// Create an edge mask for every destination of cases and/or default.
4138
void createSwitchEdgeMasks(VPInstruction *SI);
4239

4340
/// Computes and return the predicate of the edge between \p Src and \p Dst,
4441
/// possibly inserting new recipes at \p Dst (using Builder's insertion point)
4542
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);
4643

47-
/// Returns the *entry* mask for \p VPBB.
48-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
49-
return BlockMaskCache.lookup(VPBB);
50-
}
51-
52-
/// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
53-
/// already have a mask.
54-
void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
55-
// TODO: Include the masks as operands in the predicated VPlan directly to
56-
// avoid keeping the map of masks beyond the predication transform.
57-
assert(!getBlockInMask(VPBB) && "Mask already set");
58-
BlockMaskCache[VPBB] = Mask;
59-
}
60-
6144
/// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
6245
/// expected to not have a mask already.
6346
VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
@@ -82,8 +65,6 @@ class VPPredicator {
8265

8366
/// Convert phi recipes in \p VPBB to VPBlendRecipes.
8467
void convertPhisToBlends(VPBasicBlock *VPBB);
85-
86-
const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
8768
};
8869
} // namespace
8970

@@ -95,7 +76,7 @@ VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
9576
if (EdgeMask)
9677
return EdgeMask;
9778

98-
VPValue *SrcMask = getBlockInMask(Src);
79+
VPValue *SrcMask = Src->getEntryMask();
9980

10081
// If there's a single successor, there's no terminator recipe.
10182
if (Src->getNumSuccessors() == 1)
@@ -140,7 +121,6 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
140121
VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
141122
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
142123
// too.
143-
setBlockInMask(VPBB, EdgeMask);
144124
return EdgeMask;
145125
}
146126

@@ -152,15 +132,13 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
152132
BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
153133
}
154134

155-
setBlockInMask(VPBB, BlockMask);
135+
VPBB->setEntryMask(BlockMask);
156136
return BlockMask;
157137
}
158138

159139
void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
160-
if (!FoldTail) {
161-
setBlockInMask(HeaderVPBB, nullptr);
140+
if (!FoldTail)
162141
return;
163-
}
164142

165143
// Introduce the early-exit compare IV <= BTC to form header block mask.
166144
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
@@ -175,7 +153,7 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
175153

176154
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
177155
VPValue *BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
178-
setBlockInMask(HeaderVPBB, BlockMask);
156+
HeaderVPBB->setEntryMask(BlockMask);
179157
}
180158

181159
void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
@@ -201,7 +179,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
201179

202180
// We need to handle 2 separate cases below for all entries in Dst2Compares,
203181
// which excludes destinations matching the default destination.
204-
VPValue *SrcMask = getBlockInMask(Src);
182+
VPValue *SrcMask = Src->getEntryMask();
205183
VPValue *DefaultMask = nullptr;
206184
for (const auto &[Dst, Conds] : Dst2Compares) {
207185
// 1. Dst is not the default destination. Dst is reached if any of the
@@ -261,8 +239,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
261239
}
262240
}
263241

264-
DenseMap<VPBasicBlock *, VPValue *>
265-
VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
242+
void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
266243
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
267244
// Scan the body of the loop in a topological order to visit each basic block
268245
// after having visited its predecessor basic blocks.
@@ -301,5 +278,4 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
301278

302279
PrevVPBB = VPBB;
303280
}
304-
return Predicator.getBlockMaskCache();
305281
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -353,11 +353,8 @@ struct VPlanTransforms {
353353
/// Predicate and linearize the control-flow in the only loop region of
354354
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
355355
/// header, otherwise use all-true for the header mask. Masks for blocks are
356-
/// added to a block-to-mask map which is returned in order to be used later
357-
/// for wide recipe construction. This argument is temporary and will be
358-
/// removed in the future.
359-
static DenseMap<VPBasicBlock *, VPValue *>
360-
introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
356+
/// added to blocks themselves.
357+
static void introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
361358

362359
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
363360
/// a BranchOnCond recipe.

0 commit comments

Comments
 (0)