Skip to content

Commit 0adb7b8

Browse files
committed
[VPlan] Model entry-mask of VPBB in VPlan (NFC)
Model the entry-mask of a VPBB in VPlan, replacing the Old2New and BlockMasksCache hacks with a simple RAUW. It is observed that the entry-mask of a VPBB is unused after adjustRecipesForReductions: hence, erase all the masks past this transform, avoiding a wide-scale refactor to take non-recipe VPUsers into account.
1 parent 2108c62 commit 0adb7b8

File tree

6 files changed

+64
-92
lines changed

6 files changed

+64
-92
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7512,7 +7512,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75127512

75137513
VPValue *Mask = nullptr;
75147514
if (Legal->isMaskRequired(I))
7515-
Mask = getBlockInMask(Builder.getInsertBlock());
7515+
Mask = Builder.getInsertBlock()->getEntryMask();
75167516

75177517
// Determine if the pointer operand of the access is either consecutive or
75187518
// reverse consecutive.
@@ -7709,7 +7709,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
77097709
// all-true mask.
77107710
VPValue *Mask = nullptr;
77117711
if (Legal->isMaskRequired(CI))
7712-
Mask = getBlockInMask(Builder.getInsertBlock());
7712+
Mask = Builder.getInsertBlock()->getEntryMask();
77137713
else
77147714
Mask = Plan.getOrAddLiveIn(
77157715
ConstantInt::getTrue(IntegerType::getInt1Ty(CI->getContext())));
@@ -7751,7 +7751,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
77517751
// div/rem operation itself. Otherwise fall through to general handling below.
77527752
if (CM.isPredicatedInst(I)) {
77537753
SmallVector<VPValue *> Ops(Operands);
7754-
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
7754+
VPValue *Mask = Builder.getInsertBlock()->getEntryMask();
77557755
VPValue *One =
77567756
Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
77577757
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
@@ -7833,7 +7833,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
78337833
// In case of predicated execution (due to tail-folding, or conditional
78347834
// execution, or both), pass the relevant mask.
78357835
if (Legal->isMaskRequired(HI->Store))
7836-
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
7836+
HGramOps.push_back(Builder.getInsertBlock()->getEntryMask());
78377837

78387838
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
78397839
}
@@ -7887,7 +7887,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
78877887
// added initially. Masked replicate recipes will later be placed under an
78887888
// if-then construct to prevent side-effects. Generate recipes to compute
78897889
// the block mask for this region.
7890-
BlockInMask = getBlockInMask(Builder.getInsertBlock());
7890+
BlockInMask = Builder.getInsertBlock()->getEntryMask();
78917891
}
78927892

78937893
// Note that there is some custom logic to mark some intrinsics as uniform
@@ -8178,7 +8178,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81788178
ReductionOpcode == Instruction::Sub) &&
81798179
"Expected an ADD or SUB operation for predicated partial "
81808180
"reductions (because the neutral element in the mask is zero)!");
8181-
Cond = getBlockInMask(Builder.getInsertBlock());
8181+
Cond = Builder.getInsertBlock()->getEntryMask();
81828182
VPValue *Zero =
81838183
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
81848184
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
@@ -8306,15 +8306,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83068306
// ---------------------------------------------------------------------------
83078307
// Predicate and linearize the top-level loop region.
83088308
// ---------------------------------------------------------------------------
8309-
auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize(
8310-
*Plan, CM.foldTailByMasking());
8309+
VPlanTransforms::introduceMasksAndLinearize(*Plan, CM.foldTailByMasking());
83118310

83128311
// ---------------------------------------------------------------------------
83138312
// Construct wide recipes and apply predication for original scalar
83148313
// VPInstructions in the loop.
83158314
// ---------------------------------------------------------------------------
83168315
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8317-
Builder, BlockMaskCache, LVer);
8316+
Builder, LVer);
83188317
RecipeBuilder.collectScaledReductions(Range);
83198318

83208319
// Scan the body of the loop in a topological order to visit each basic block
@@ -8325,9 +8324,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83258324

83268325
auto *MiddleVPBB = Plan->getMiddleBlock();
83278326
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
8328-
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
8329-
// temporarily to update created block masks.
8330-
DenseMap<VPValue *, VPValue *> Old2New;
83318327
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
83328328
// Convert input VPInstructions to widened recipes.
83338329
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -8381,7 +8377,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83818377
}
83828378
if (Recipe->getNumDefinedValues() == 1) {
83838379
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
8384-
Old2New[SingleDef] = Recipe->getVPSingleValue();
8380+
SingleDef->eraseFromParent();
83858381
} else {
83868382
assert(Recipe->getNumDefinedValues() == 0 &&
83878383
"Unexpected multidef recipe");
@@ -8390,14 +8386,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83908386
}
83918387
}
83928388

8393-
// replaceAllUsesWith above may invalidate the block masks. Update them here.
8394-
// TODO: Include the masks as operands in the predicated VPlan directly
8395-
// to remove the need to keep a map of masks beyond the predication
8396-
// transform.
8397-
RecipeBuilder.updateBlockMaskCache(Old2New);
8398-
for (VPValue *Old : Old2New.keys())
8399-
Old->getDefiningRecipe()->eraseFromParent();
8400-
84018389
assert(isa<VPRegionBlock>(LoopRegion) &&
84028390
!LoopRegion->getEntryBasicBlock()->empty() &&
84038391
"entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8431,6 +8419,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84318419
// Adjust the recipes for any inloop reductions.
84328420
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
84338421

8422+
// Erase the block entry masks, since they're not used any longer, so that
8423+
// future transforms only deal with recipe VPUsers.
8424+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
8425+
VPBB->eraseEntryMask();
8426+
84348427
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
84358428
// NaNs if possible, bail out otherwise.
84368429
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
@@ -8521,9 +8514,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
85218514

85228515
// Collect mapping of IR header phis to header phi recipes, to be used in
85238516
// addScalarResumePhis.
8524-
DenseMap<VPBasicBlock *, VPValue *> BlockMaskCache;
85258517
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8526-
Builder, BlockMaskCache, nullptr /*LVer*/);
8518+
Builder, nullptr /*LVer*/);
85278519
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
85288520
if (isa<VPCanonicalIVPHIRecipe>(&R))
85298521
continue;
@@ -8681,7 +8673,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86818673

86828674
VPValue *CondOp = nullptr;
86838675
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
8684-
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
8676+
CondOp = CurrentLink->getParent()->getEntryMask();
86858677

86868678
// TODO: Retrieve FMFs from recipes directly.
86878679
RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor(
@@ -8729,7 +8721,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87298721
// different numbers of lanes. Partial reductions mask the input instead.
87308722
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
87318723
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
8732-
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
8724+
VPValue *Cond = PhiR->getParent()->getEntryMask();
87338725
std::optional<FastMathFlags> FMFs =
87348726
PhiTy->isFloatingPointTy()
87358727
? std::make_optional(RdxDesc.getFastMathFlags())

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,6 @@ class VPRecipeBuilder {
6767

6868
VPBuilder &Builder;
6969

70-
/// The mask of each VPBB, generated earlier and used for predicating recipes
71-
/// in VPBB.
72-
/// TODO: remove by applying predication when generating the masks.
73-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;
74-
7570
// VPlan construction support: Hold a mapping from ingredients to
7671
// their recipe.
7772
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
@@ -149,11 +144,9 @@ class VPRecipeBuilder {
149144
LoopVectorizationLegality *Legal,
150145
LoopVectorizationCostModel &CM,
151146
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
152-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
153147
LoopVersioning *LVer)
154148
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
155-
CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
156-
LVer(LVer) {}
149+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
157150

158151
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
159152
auto It = ScaledReductionMap.find(ExitInst);
@@ -182,12 +175,6 @@ class VPRecipeBuilder {
182175
Ingredient2Recipe[I] = R;
183176
}
184177

185-
/// Returns the *entry* mask for block \p VPBB or null if the mask is
186-
/// all-true.
187-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
188-
return BlockMaskCache.lookup(VPBB);
189-
}
190-
191178
/// Return the recipe created for given ingredient.
192179
VPRecipeBase *getRecipe(Instruction *I) {
193180
assert(Ingredient2Recipe.count(I) &&
@@ -211,15 +198,6 @@ class VPRecipeBuilder {
211198
}
212199
return Plan.getOrAddLiveIn(V);
213200
}
214-
215-
void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
216-
for (auto &[_, V] : BlockMaskCache) {
217-
if (auto *New = Old2New.lookup(V)) {
218-
V->replaceAllUsesWith(New);
219-
V = New;
220-
}
221-
}
222-
}
223201
};
224202
} // end namespace llvm
225203

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3811,13 +3811,15 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
38113811

38123812
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
38133813
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3814-
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3815-
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3814+
/// output IR instructions. All PHI-like recipes must come before any non-PHI
3815+
/// recipes. It also has an operand corresponding to a mask on which to enter
3816+
/// the block, which is used early in the VPlan construction.
3817+
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase, protected VPUser {
38163818
friend class VPlan;
38173819

38183820
/// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
38193821
VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3820-
: VPBlockBase(VPBasicBlockSC, Name.str()) {
3822+
: VPBlockBase(VPBasicBlockSC, Name.str()), VPUser(VPUBlockSC) {
38213823
if (Recipe)
38223824
appendRecipe(Recipe);
38233825
}
@@ -3830,7 +3832,7 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
38303832
RecipeListTy Recipes;
38313833

38323834
VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3833-
: VPBlockBase(BlockSC, Name.str()) {}
3835+
: VPBlockBase(BlockSC, Name.str()), VPUser(VPUBlockSC) {}
38343836

38353837
public:
38363838
~VPBasicBlock() override {
@@ -3941,6 +3943,21 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
39413943
/// second predecessor is the exiting block of the region.
39423944
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
39433945

3946+
/// Get the entry mask of this block. nullptr is used to communicate an
3947+
/// all-ones mask.
3948+
VPValue *getEntryMask() const {
3949+
return getNumOperands() ? getOperand(0) : nullptr;
3950+
}
3951+
3952+
/// Set the entry mask of this block: used by VPlanPredicator, when
3953+
/// predicating blocks.
3954+
void setEntryMask(VPValue *M) {
3955+
getNumOperands() ? setOperand(0, M) : addOperand(M);
3956+
}
3957+
3958+
/// Erase the entry mask of this block.
3959+
void eraseEntryMask() { eraseOperands(); }
3960+
39443961
protected:
39453962
/// Execute the recipes in the IR basic block \p BB.
39463963
void executeRecipes(VPTransformState *State, BasicBlock *BB);

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,32 +32,15 @@ class VPPredicator {
3232
using EdgeMaskCacheTy =
3333
DenseMap<std::pair<const VPBasicBlock *, const VPBasicBlock *>,
3434
VPValue *>;
35-
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
3635
EdgeMaskCacheTy EdgeMaskCache;
3736

38-
BlockMaskCacheTy BlockMaskCache;
39-
4037
/// Create an edge mask for every destination of cases and/or default.
4138
void createSwitchEdgeMasks(VPInstruction *SI);
4239

4340
/// Computes and return the predicate of the edge between \p Src and \p Dst,
4441
/// possibly inserting new recipes at \p Dst (using Builder's insertion point)
4542
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);
4643

47-
/// Returns the *entry* mask for \p VPBB.
48-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
49-
return BlockMaskCache.lookup(VPBB);
50-
}
51-
52-
/// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
53-
/// already have a mask.
54-
void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
55-
// TODO: Include the masks as operands in the predicated VPlan directly to
56-
// avoid keeping the map of masks beyond the predication transform.
57-
assert(!getBlockInMask(VPBB) && "Mask already set");
58-
BlockMaskCache[VPBB] = Mask;
59-
}
60-
6144
/// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
6245
/// expected to not have a mask already.
6346
VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
@@ -82,8 +65,6 @@ class VPPredicator {
8265

8366
/// Convert phi recipes in \p VPBB to VPBlendRecipes.
8467
void convertPhisToBlends(VPBasicBlock *VPBB);
85-
86-
const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
8768
};
8869
} // namespace
8970

@@ -95,7 +76,7 @@ VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
9576
if (EdgeMask)
9677
return EdgeMask;
9778

98-
VPValue *SrcMask = getBlockInMask(Src);
79+
VPValue *SrcMask = Src->getEntryMask();
9980

10081
// If there's a single successor, there's no terminator recipe.
10182
if (Src->getNumSuccessors() == 1)
@@ -140,7 +121,6 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
140121
VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
141122
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
142123
// too.
143-
setBlockInMask(VPBB, EdgeMask);
144124
return EdgeMask;
145125
}
146126

@@ -152,15 +132,13 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
152132
BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
153133
}
154134

155-
setBlockInMask(VPBB, BlockMask);
135+
VPBB->setEntryMask(BlockMask);
156136
return BlockMask;
157137
}
158138

159139
void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
160-
if (!FoldTail) {
161-
setBlockInMask(HeaderVPBB, nullptr);
140+
if (!FoldTail)
162141
return;
163-
}
164142

165143
// Introduce the early-exit compare IV <= BTC to form header block mask.
166144
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
@@ -175,7 +153,7 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
175153

176154
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
177155
VPValue *BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
178-
setBlockInMask(HeaderVPBB, BlockMask);
156+
HeaderVPBB->setEntryMask(BlockMask);
179157
}
180158

181159
void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
@@ -201,7 +179,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
201179

202180
// We need to handle 2 separate cases below for all entries in Dst2Compares,
203181
// which excludes destinations matching the default destination.
204-
VPValue *SrcMask = getBlockInMask(Src);
182+
VPValue *SrcMask = Src->getEntryMask();
205183
VPValue *DefaultMask = nullptr;
206184
for (const auto &[Dst, Conds] : Dst2Compares) {
207185
// 1. Dst is not the default destination. Dst is reached if any of the
@@ -261,8 +239,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
261239
}
262240
}
263241

264-
DenseMap<VPBasicBlock *, VPValue *>
265-
VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
242+
void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
266243
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
267244
// Scan the body of the loop in a topological order to visit each basic block
268245
// after having visited its predecessor basic blocks.
@@ -301,5 +278,4 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
301278

302279
PrevVPBB = VPBB;
303280
}
304-
return Predicator.getBlockMaskCache();
305281
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -353,11 +353,8 @@ struct VPlanTransforms {
353353
/// Predicate and linearize the control-flow in the only loop region of
354354
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
355355
/// header, otherwise use all-true for the header mask. Masks for blocks are
356-
/// added to a block-to-mask map which is returned in order to be used later
357-
/// for wide recipe construction. This argument is temporary and will be
358-
/// removed in the future.
359-
static DenseMap<VPBasicBlock *, VPValue *>
360-
introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
356+
/// added to blocks themselves.
357+
static void introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
361358

362359
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
363360
/// a BranchOnCond recipe.

0 commit comments

Comments
 (0)