Skip to content

Commit 4af6cba

Browse files
committed
[VPlan] Model entry-mask of VPBB in VPlan (NFC)
Model the entry-mask of a VPBB in VPlan, replacing the Old2New and BlockMasksCache hacks with a simple RAUW. It is observed that the entry-mask of a VPBB is unused after adjustRecipesForReductions: hence, erase all the masks past this transform, avoiding a wide-scale refactor to take non-recipe VPUsers into account.
1 parent 912cc5f commit 4af6cba

File tree

6 files changed

+64
-92
lines changed

6 files changed

+64
-92
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7512,7 +7512,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
75127512

75137513
VPValue *Mask = nullptr;
75147514
if (Legal->isMaskRequired(I))
7515-
Mask = getBlockInMask(Builder.getInsertBlock());
7515+
Mask = Builder.getInsertBlock()->getEntryMask();
75167516

75177517
// Determine if the pointer operand of the access is either consecutive or
75187518
// reverse consecutive.
@@ -7709,7 +7709,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
77097709
// all-true mask.
77107710
VPValue *Mask = nullptr;
77117711
if (Legal->isMaskRequired(CI))
7712-
Mask = getBlockInMask(Builder.getInsertBlock());
7712+
Mask = Builder.getInsertBlock()->getEntryMask();
77137713
else
77147714
Mask = Plan.getOrAddLiveIn(
77157715
ConstantInt::getTrue(IntegerType::getInt1Ty(CI->getContext())));
@@ -7751,7 +7751,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
77517751
// div/rem operation itself. Otherwise fall through to general handling below.
77527752
if (CM.isPredicatedInst(I)) {
77537753
SmallVector<VPValue *> Ops(Operands);
7754-
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
7754+
VPValue *Mask = Builder.getInsertBlock()->getEntryMask();
77557755
VPValue *One = Plan.getConstantInt(I->getType(), 1u);
77567756
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
77577757
Ops[1] = SafeRHS;
@@ -7831,7 +7831,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
78317831
// In case of predicated execution (due to tail-folding, or conditional
78327832
// execution, or both), pass the relevant mask.
78337833
if (Legal->isMaskRequired(HI->Store))
7834-
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
7834+
HGramOps.push_back(Builder.getInsertBlock()->getEntryMask());
78357835

78367836
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
78377837
}
@@ -7885,7 +7885,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
78857885
// added initially. Masked replicate recipes will later be placed under an
78867886
// if-then construct to prevent side-effects. Generate recipes to compute
78877887
// the block mask for this region.
7888-
BlockInMask = getBlockInMask(Builder.getInsertBlock());
7888+
BlockInMask = Builder.getInsertBlock()->getEntryMask();
78897889
}
78907890

78917891
// Note that there is some custom logic to mark some intrinsics as uniform
@@ -8176,7 +8176,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81768176
ReductionOpcode == Instruction::Sub) &&
81778177
"Expected an ADD or SUB operation for predicated partial "
81788178
"reductions (because the neutral element in the mask is zero)!");
8179-
Cond = getBlockInMask(Builder.getInsertBlock());
8179+
Cond = Builder.getInsertBlock()->getEntryMask();
81808180
VPValue *Zero = Plan.getConstantInt(Reduction->getType(), 0);
81818181
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
81828182
}
@@ -8303,15 +8303,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83038303
// ---------------------------------------------------------------------------
83048304
// Predicate and linearize the top-level loop region.
83058305
// ---------------------------------------------------------------------------
8306-
auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize(
8307-
*Plan, CM.foldTailByMasking());
8306+
VPlanTransforms::introduceMasksAndLinearize(*Plan, CM.foldTailByMasking());
83088307

83098308
// ---------------------------------------------------------------------------
83108309
// Construct wide recipes and apply predication for original scalar
83118310
// VPInstructions in the loop.
83128311
// ---------------------------------------------------------------------------
83138312
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8314-
Builder, BlockMaskCache, LVer);
8313+
Builder, LVer);
83158314
RecipeBuilder.collectScaledReductions(Range);
83168315

83178316
// Scan the body of the loop in a topological order to visit each basic block
@@ -8322,9 +8321,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83228321

83238322
auto *MiddleVPBB = Plan->getMiddleBlock();
83248323
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
8325-
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
8326-
// temporarily to update created block masks.
8327-
DenseMap<VPValue *, VPValue *> Old2New;
83288324
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
83298325
// Convert input VPInstructions to widened recipes.
83308326
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -8378,7 +8374,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83788374
}
83798375
if (Recipe->getNumDefinedValues() == 1) {
83808376
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
8381-
Old2New[SingleDef] = Recipe->getVPSingleValue();
8377+
SingleDef->eraseFromParent();
83828378
} else {
83838379
assert(Recipe->getNumDefinedValues() == 0 &&
83848380
"Unexpected multidef recipe");
@@ -8387,14 +8383,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
83878383
}
83888384
}
83898385

8390-
// replaceAllUsesWith above may invalidate the block masks. Update them here.
8391-
// TODO: Include the masks as operands in the predicated VPlan directly
8392-
// to remove the need to keep a map of masks beyond the predication
8393-
// transform.
8394-
RecipeBuilder.updateBlockMaskCache(Old2New);
8395-
for (VPValue *Old : Old2New.keys())
8396-
Old->getDefiningRecipe()->eraseFromParent();
8397-
83988386
assert(isa<VPRegionBlock>(LoopRegion) &&
83998387
!LoopRegion->getEntryBasicBlock()->empty() &&
84008388
"entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8428,6 +8416,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84288416
// Adjust the recipes for any inloop reductions.
84298417
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
84308418

8419+
// Erase the block entry masks, since they're not used any longer, so that
8420+
// future transforms only deal with recipe VPUsers.
8421+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
8422+
VPBB->eraseEntryMask();
8423+
84318424
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
84328425
// NaNs if possible, bail out otherwise.
84338426
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
@@ -8518,9 +8511,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
85188511

85198512
// Collect mapping of IR header phis to header phi recipes, to be used in
85208513
// addScalarResumePhis.
8521-
DenseMap<VPBasicBlock *, VPValue *> BlockMaskCache;
85228514
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8523-
Builder, BlockMaskCache, nullptr /*LVer*/);
8515+
Builder, nullptr /*LVer*/);
85248516
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
85258517
if (isa<VPCanonicalIVPHIRecipe>(&R))
85268518
continue;
@@ -8678,7 +8670,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86788670

86798671
VPValue *CondOp = nullptr;
86808672
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
8681-
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
8673+
CondOp = CurrentLink->getParent()->getEntryMask();
86828674

86838675
// TODO: Retrieve FMFs from recipes directly.
86848676
RecurrenceDescriptor RdxDesc = Legal->getRecurrenceDescriptor(
@@ -8726,7 +8718,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87268718
// different numbers of lanes. Partial reductions mask the input instead.
87278719
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
87288720
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
8729-
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
8721+
VPValue *Cond = PhiR->getParent()->getEntryMask();
87308722
std::optional<FastMathFlags> FMFs =
87318723
PhiTy->isFloatingPointTy()
87328724
? std::make_optional(RdxDesc.getFastMathFlags())

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,6 @@ class VPRecipeBuilder {
6767

6868
VPBuilder &Builder;
6969

70-
/// The mask of each VPBB, generated earlier and used for predicating recipes
71-
/// in VPBB.
72-
/// TODO: remove by applying predication when generating the masks.
73-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;
74-
7570
// VPlan construction support: Hold a mapping from ingredients to
7671
// their recipe.
7772
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
@@ -149,11 +144,9 @@ class VPRecipeBuilder {
149144
LoopVectorizationLegality *Legal,
150145
LoopVectorizationCostModel &CM,
151146
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
152-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache,
153147
LoopVersioning *LVer)
154148
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
155-
CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache),
156-
LVer(LVer) {}
149+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
157150

158151
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
159152
auto It = ScaledReductionMap.find(ExitInst);
@@ -182,12 +175,6 @@ class VPRecipeBuilder {
182175
Ingredient2Recipe[I] = R;
183176
}
184177

185-
/// Returns the *entry* mask for block \p VPBB or null if the mask is
186-
/// all-true.
187-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
188-
return BlockMaskCache.lookup(VPBB);
189-
}
190-
191178
/// Return the recipe created for given ingredient.
192179
VPRecipeBase *getRecipe(Instruction *I) {
193180
assert(Ingredient2Recipe.count(I) &&
@@ -211,15 +198,6 @@ class VPRecipeBuilder {
211198
}
212199
return Plan.getOrAddLiveIn(V);
213200
}
214-
215-
void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
216-
for (auto &[_, V] : BlockMaskCache) {
217-
if (auto *New = Old2New.lookup(V)) {
218-
V->replaceAllUsesWith(New);
219-
V = New;
220-
}
221-
}
222-
}
223201
};
224202
} // end namespace llvm
225203

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3816,13 +3816,15 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
38163816

38173817
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
38183818
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3819-
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3820-
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3819+
/// output IR instructions. All PHI-like recipes must come before any non-PHI
3820+
/// recipes. It also has an operand corresponding to a mask on which to enter
3821+
/// the block, which is used early in the VPlan construction.
3822+
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase, protected VPUser {
38213823
friend class VPlan;
38223824

38233825
/// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
38243826
VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3825-
: VPBlockBase(VPBasicBlockSC, Name.str()) {
3827+
: VPBlockBase(VPBasicBlockSC, Name.str()), VPUser(VPUBlockSC) {
38263828
if (Recipe)
38273829
appendRecipe(Recipe);
38283830
}
@@ -3835,7 +3837,7 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
38353837
RecipeListTy Recipes;
38363838

38373839
VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3838-
: VPBlockBase(BlockSC, Name.str()) {}
3840+
: VPBlockBase(BlockSC, Name.str()), VPUser(VPUBlockSC) {}
38393841

38403842
public:
38413843
~VPBasicBlock() override {
@@ -3946,6 +3948,21 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
39463948
/// second predecessor is the exiting block of the region.
39473949
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
39483950

3951+
/// Get the entry mask of this block. nullptr is used to communicate an
3952+
/// all-ones mask.
3953+
VPValue *getEntryMask() const {
3954+
return getNumOperands() ? getOperand(0) : nullptr;
3955+
}
3956+
3957+
/// Set the entry mask of this block: used by VPlanPredicator, when
3958+
/// predicating blocks.
3959+
void setEntryMask(VPValue *M) {
3960+
getNumOperands() ? setOperand(0, M) : addOperand(M);
3961+
}
3962+
3963+
/// Erase the entry mask of this block.
3964+
void eraseEntryMask() { eraseOperands(); }
3965+
39493966
protected:
39503967
/// Execute the recipes in the IR basic block \p BB.
39513968
void executeRecipes(VPTransformState *State, BasicBlock *BB);

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,32 +32,15 @@ class VPPredicator {
3232
using EdgeMaskCacheTy =
3333
DenseMap<std::pair<const VPBasicBlock *, const VPBasicBlock *>,
3434
VPValue *>;
35-
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
3635
EdgeMaskCacheTy EdgeMaskCache;
3736

38-
BlockMaskCacheTy BlockMaskCache;
39-
4037
/// Create an edge mask for every destination of cases and/or default.
4138
void createSwitchEdgeMasks(VPInstruction *SI);
4239

4340
/// Computes and return the predicate of the edge between \p Src and \p Dst,
4441
/// possibly inserting new recipes at \p Dst (using Builder's insertion point)
4542
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);
4643

47-
/// Returns the *entry* mask for \p VPBB.
48-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
49-
return BlockMaskCache.lookup(VPBB);
50-
}
51-
52-
/// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
53-
/// already have a mask.
54-
void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
55-
// TODO: Include the masks as operands in the predicated VPlan directly to
56-
// avoid keeping the map of masks beyond the predication transform.
57-
assert(!getBlockInMask(VPBB) && "Mask already set");
58-
BlockMaskCache[VPBB] = Mask;
59-
}
60-
6144
/// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
6245
/// expected to not have a mask already.
6346
VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
@@ -82,8 +65,6 @@ class VPPredicator {
8265

8366
/// Convert phi recipes in \p VPBB to VPBlendRecipes.
8467
void convertPhisToBlends(VPBasicBlock *VPBB);
85-
86-
const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
8768
};
8869
} // namespace
8970

@@ -95,7 +76,7 @@ VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
9576
if (EdgeMask)
9677
return EdgeMask;
9778

98-
VPValue *SrcMask = getBlockInMask(Src);
79+
VPValue *SrcMask = Src->getEntryMask();
9980

10081
// If there's a single successor, there's no terminator recipe.
10182
if (Src->getNumSuccessors() == 1)
@@ -140,7 +121,6 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
140121
VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
141122
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
142123
// too.
143-
setBlockInMask(VPBB, EdgeMask);
144124
return EdgeMask;
145125
}
146126

@@ -152,15 +132,13 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
152132
BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
153133
}
154134

155-
setBlockInMask(VPBB, BlockMask);
135+
VPBB->setEntryMask(BlockMask);
156136
return BlockMask;
157137
}
158138

159139
void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
160-
if (!FoldTail) {
161-
setBlockInMask(HeaderVPBB, nullptr);
140+
if (!FoldTail)
162141
return;
163-
}
164142

165143
// Introduce the early-exit compare IV <= BTC to form header block mask.
166144
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
@@ -175,7 +153,7 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
175153

176154
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
177155
VPValue *BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
178-
setBlockInMask(HeaderVPBB, BlockMask);
156+
HeaderVPBB->setEntryMask(BlockMask);
179157
}
180158

181159
void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
@@ -201,7 +179,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
201179

202180
// We need to handle 2 separate cases below for all entries in Dst2Compares,
203181
// which excludes destinations matching the default destination.
204-
VPValue *SrcMask = getBlockInMask(Src);
182+
VPValue *SrcMask = Src->getEntryMask();
205183
VPValue *DefaultMask = nullptr;
206184
for (const auto &[Dst, Conds] : Dst2Compares) {
207185
// 1. Dst is not the default destination. Dst is reached if any of the
@@ -261,8 +239,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
261239
}
262240
}
263241

264-
DenseMap<VPBasicBlock *, VPValue *>
265-
VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
242+
void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
266243
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
267244
// Scan the body of the loop in a topological order to visit each basic block
268245
// after having visited its predecessor basic blocks.
@@ -301,5 +278,4 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
301278

302279
PrevVPBB = VPBB;
303280
}
304-
return Predicator.getBlockMaskCache();
305281
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -353,11 +353,8 @@ struct VPlanTransforms {
353353
/// Predicate and linearize the control-flow in the only loop region of
354354
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
355355
/// header, otherwise use all-true for the header mask. Masks for blocks are
356-
/// added to a block-to-mask map which is returned in order to be used later
357-
/// for wide recipe construction. This argument is temporary and will be
358-
/// removed in the future.
359-
static DenseMap<VPBasicBlock *, VPValue *>
360-
introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
356+
/// added to blocks themselves.
357+
static void introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
361358

362359
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
363360
/// a BranchOnCond recipe.

0 commit comments

Comments
 (0)