Skip to content

Commit 57b35e1

Browse files
committed
[VPlan] Model entry-mask of VPBB in VPlan (NFC)
Model the entry-mask of a VPBB in VPlan, replacing the Old2New and BlockMasksCache hacks with a simple RAUW. It is observed that the entry-mask of a VPBB is unused after adjustRecipesForReductions: hence, erase all the masks past this transform, avoiding a wide-scale refactor to take non-recipe VPUsers into account.
1 parent 1abb055 commit 57b35e1

File tree

6 files changed

+64
-90
lines changed

6 files changed

+64
-90
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7578,7 +7578,7 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
75787578

75797579
VPValue *Mask = nullptr;
75807580
if (Legal->isMaskRequired(I))
7581-
Mask = getBlockInMask(Builder.getInsertBlock());
7581+
Mask = Builder.getInsertBlock()->getEntryMask();
75827582

75837583
// Determine if the pointer operand of the access is either consecutive or
75847584
// reverse consecutive.
@@ -7791,7 +7791,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI,
77917791
// all-true mask.
77927792
VPValue *Mask = nullptr;
77937793
if (Legal->isMaskRequired(CI))
7794-
Mask = getBlockInMask(Builder.getInsertBlock());
7794+
Mask = Builder.getInsertBlock()->getEntryMask();
77957795
else
77967796
Mask = Plan.getOrAddLiveIn(
77977797
ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
@@ -7834,7 +7834,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
78347834
// div/rem operation itself. Otherwise fall through to general handling below.
78357835
if (CM.isPredicatedInst(I)) {
78367836
SmallVector<VPValue *> Ops(VPI->operands());
7837-
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
7837+
VPValue *Mask = Builder.getInsertBlock()->getEntryMask();
78387838
VPValue *One = Plan.getConstantInt(I->getType(), 1u);
78397839
auto *SafeRHS =
78407840
Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc());
@@ -7914,7 +7914,7 @@ VPHistogramRecipe *VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
79147914
// In case of predicated execution (due to tail-folding, or conditional
79157915
// execution, or both), pass the relevant mask.
79167916
if (Legal->isMaskRequired(HI->Store))
7917-
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
7917+
HGramOps.push_back(Builder.getInsertBlock()->getEntryMask());
79187918

79197919
return new VPHistogramRecipe(Opcode, HGramOps, VPI->getDebugLoc());
79207920
}
@@ -7968,7 +7968,7 @@ VPReplicateRecipe *VPRecipeBuilder::handleReplication(VPInstruction *VPI,
79687968
// added initially. Masked replicate recipes will later be placed under an
79697969
// if-then construct to prevent side-effects. Generate recipes to compute
79707970
// the block mask for this region.
7971-
BlockInMask = getBlockInMask(Builder.getInsertBlock());
7971+
BlockInMask = Builder.getInsertBlock()->getEntryMask();
79727972
}
79737973

79747974
// Note that there is some custom logic to mark some intrinsics as uniform
@@ -8302,7 +8302,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(VPInstruction *Reduction,
83028302

83038303
VPValue *Cond = nullptr;
83048304
if (CM.blockNeedsPredicationForAnyReason(ReductionI->getParent()))
8305-
Cond = getBlockInMask(Builder.getInsertBlock());
8305+
Cond = Builder.getInsertBlock()->getEntryMask();
83068306
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
83078307
ScaleFactor, ReductionI);
83088308
}
@@ -8426,15 +8426,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84268426
// ---------------------------------------------------------------------------
84278427
// Predicate and linearize the top-level loop region.
84288428
// ---------------------------------------------------------------------------
8429-
auto BlockMaskCache = VPlanTransforms::introduceMasksAndLinearize(
8430-
*Plan, CM.foldTailByMasking());
8429+
VPlanTransforms::introduceMasksAndLinearize(*Plan, CM.foldTailByMasking());
84318430

84328431
// ---------------------------------------------------------------------------
84338432
// Construct wide recipes and apply predication for original scalar
84348433
// VPInstructions in the loop.
84358434
// ---------------------------------------------------------------------------
84368435
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
8437-
Builder, BlockMaskCache);
8436+
Builder);
84388437
// TODO: Handle partial reductions with EVL tail folding.
84398438
if (!CM.foldTailWithEVL())
84408439
RecipeBuilder.collectScaledReductions(Range);
@@ -8447,9 +8446,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84478446

84488447
auto *MiddleVPBB = Plan->getMiddleBlock();
84498448
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
8450-
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
8451-
// temporarily to update created block masks.
8452-
DenseMap<VPValue *, VPValue *> Old2New;
84538449
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
84548450
// Convert input VPInstructions to widened recipes.
84558451
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -8505,7 +8501,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85058501
}
85068502
if (Recipe->getNumDefinedValues() == 1) {
85078503
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
8508-
Old2New[SingleDef] = Recipe->getVPSingleValue();
8504+
SingleDef->eraseFromParent();
85098505
} else {
85108506
assert(Recipe->getNumDefinedValues() == 0 &&
85118507
"Unexpected multidef recipe");
@@ -8514,14 +8510,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85148510
}
85158511
}
85168512

8517-
// replaceAllUsesWith above may invalidate the block masks. Update them here.
8518-
// TODO: Include the masks as operands in the predicated VPlan directly
8519-
// to remove the need to keep a map of masks beyond the predication
8520-
// transform.
8521-
RecipeBuilder.updateBlockMaskCache(Old2New);
8522-
for (VPValue *Old : Old2New.keys())
8523-
Old->getDefiningRecipe()->eraseFromParent();
8524-
85258513
assert(isa<VPRegionBlock>(LoopRegion) &&
85268514
!LoopRegion->getEntryBasicBlock()->empty() &&
85278515
"entry block must be set to a VPRegionBlock having a non-empty entry "
@@ -8541,6 +8529,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85418529
// Adjust the recipes for any inloop reductions.
85428530
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
85438531

8532+
// Erase the block entry masks, since they're not used any longer, so that
8533+
// future transforms only deal with recipe VPUsers.
8534+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
8535+
VPBB->eraseEntryMask();
8536+
85448537
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
85458538
// NaNs if possible, bail out otherwise.
85468539
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
@@ -8792,7 +8785,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87928785

87938786
VPValue *CondOp = nullptr;
87948787
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
8795-
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
8788+
CondOp = CurrentLink->getParent()->getEntryMask();
87968789

87978790
auto *RedRecipe = new VPReductionRecipe(
87988791
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
@@ -8833,7 +8826,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
88338826
// different numbers of lanes. Partial reductions mask the input instead.
88348827
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
88358828
!isa<VPPartialReductionRecipe>(OrigExitingVPV)) {
8836-
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
8829+
VPValue *Cond = PhiR->getParent()->getEntryMask();
88378830
std::optional<FastMathFlags> FMFs =
88388831
PhiTy->isFloatingPointTy()
88398832
? std::make_optional(RdxDesc.getFastMathFlags())

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,6 @@ class VPRecipeBuilder {
6767

6868
VPBuilder &Builder;
6969

70-
/// The mask of each VPBB, generated earlier and used for predicating recipes
71-
/// in VPBB.
72-
/// TODO: remove by applying predication when generating the masks.
73-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache;
74-
7570
// VPlan construction support: Hold a mapping from ingredients to
7671
// their recipe.
7772
DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
@@ -138,10 +133,9 @@ class VPRecipeBuilder {
138133
const TargetTransformInfo *TTI,
139134
LoopVectorizationLegality *Legal,
140135
LoopVectorizationCostModel &CM,
141-
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
142-
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache)
136+
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
143137
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
144-
CM(CM), PSE(PSE), Builder(Builder), BlockMaskCache(BlockMaskCache) {}
138+
CM(CM), PSE(PSE), Builder(Builder) {}
145139

146140
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
147141
auto It = ScaledReductionMap.find(ExitInst);
@@ -169,12 +163,6 @@ class VPRecipeBuilder {
169163
Ingredient2Recipe[I] = R;
170164
}
171165

172-
/// Returns the *entry* mask for block \p VPBB or null if the mask is
173-
/// all-true.
174-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
175-
return BlockMaskCache.lookup(VPBB);
176-
}
177-
178166
/// Return the recipe created for given ingredient.
179167
VPRecipeBase *getRecipe(Instruction *I) {
180168
assert(Ingredient2Recipe.count(I) &&
@@ -196,15 +184,6 @@ class VPRecipeBuilder {
196184
}
197185
return Plan.getOrAddLiveIn(V);
198186
}
199-
200-
void updateBlockMaskCache(DenseMap<VPValue *, VPValue *> &Old2New) {
201-
for (auto &[_, V] : BlockMaskCache) {
202-
if (auto *New = Old2New.lookup(V)) {
203-
V->replaceAllUsesWith(New);
204-
V = New;
205-
}
206-
}
207-
}
208187
};
209188
} // end namespace llvm
210189

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3982,13 +3982,15 @@ struct CastInfo<VPIRMetadata, const VPRecipeBase *>
39823982

39833983
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
39843984
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3985-
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3986-
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3985+
/// output IR instructions. All PHI-like recipes must come before any non-PHI
3986+
/// recipes. It also has an operand corresponding to a mask on which to enter
3987+
/// the block, which is used early in the VPlan construction.
3988+
class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase, protected VPUser {
39873989
friend class VPlan;
39883990

39893991
/// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
39903992
VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3991-
: VPBlockBase(VPBasicBlockSC, Name.str()) {
3993+
: VPBlockBase(VPBasicBlockSC, Name.str()), VPUser(VPUBlockSC) {
39923994
if (Recipe)
39933995
appendRecipe(Recipe);
39943996
}
@@ -4001,7 +4003,7 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
40014003
RecipeListTy Recipes;
40024004

40034005
VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4004-
: VPBlockBase(BlockSC, Name.str()) {}
4006+
: VPBlockBase(BlockSC, Name.str()), VPUser(VPUBlockSC) {}
40054007

40064008
public:
40074009
~VPBasicBlock() override {
@@ -4112,6 +4114,21 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
41124114
/// second predecessor is the exiting block of the region.
41134115
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
41144116

4117+
/// Get the entry mask of this block. nullptr is used to communicate an
4118+
/// all-ones mask.
4119+
VPValue *getEntryMask() const {
4120+
return getNumOperands() ? getOperand(0) : nullptr;
4121+
}
4122+
4123+
/// Set the entry mask of this block: used by VPlanPredicator, when
4124+
/// predicating blocks.
4125+
void setEntryMask(VPValue *M) {
4126+
getNumOperands() ? setOperand(0, M) : addOperand(M);
4127+
}
4128+
4129+
/// Erase the entry mask of this block.
4130+
void eraseEntryMask() { eraseOperands(); }
4131+
41154132
protected:
41164133
/// Execute the recipes in the IR basic block \p BB.
41174134
void executeRecipes(VPTransformState *State, BasicBlock *BB);

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,32 +32,15 @@ class VPPredicator {
3232
using EdgeMaskCacheTy =
3333
DenseMap<std::pair<const VPBasicBlock *, const VPBasicBlock *>,
3434
VPValue *>;
35-
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
3635
EdgeMaskCacheTy EdgeMaskCache;
3736

38-
BlockMaskCacheTy BlockMaskCache;
39-
4037
/// Create an edge mask for every destination of cases and/or default.
4138
void createSwitchEdgeMasks(VPInstruction *SI);
4239

4340
/// Computes and return the predicate of the edge between \p Src and \p Dst,
4441
/// possibly inserting new recipes at \p Dst (using Builder's insertion point)
4542
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);
4643

47-
/// Returns the *entry* mask for \p VPBB.
48-
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
49-
return BlockMaskCache.lookup(VPBB);
50-
}
51-
52-
/// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
53-
/// already have a mask.
54-
void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
55-
// TODO: Include the masks as operands in the predicated VPlan directly to
56-
// avoid keeping the map of masks beyond the predication transform.
57-
assert(!getBlockInMask(VPBB) && "Mask already set");
58-
BlockMaskCache[VPBB] = Mask;
59-
}
60-
6144
/// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
6245
/// expected to not have a mask already.
6346
VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
@@ -82,8 +65,6 @@ class VPPredicator {
8265

8366
/// Convert phi recipes in \p VPBB to VPBlendRecipes.
8467
void convertPhisToBlends(VPBasicBlock *VPBB);
85-
86-
const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
8768
};
8869
} // namespace
8970

@@ -95,7 +76,7 @@ VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
9576
if (EdgeMask)
9677
return EdgeMask;
9778

98-
VPValue *SrcMask = getBlockInMask(Src);
79+
VPValue *SrcMask = Src->getEntryMask();
9980

10081
// If there's a single successor, there's no terminator recipe.
10182
if (Src->getNumSuccessors() == 1)
@@ -140,7 +121,6 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
140121
VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
141122
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
142123
// too.
143-
setBlockInMask(VPBB, EdgeMask);
144124
return EdgeMask;
145125
}
146126

@@ -152,15 +132,13 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
152132
BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
153133
}
154134

155-
setBlockInMask(VPBB, BlockMask);
135+
VPBB->setEntryMask(BlockMask);
156136
return BlockMask;
157137
}
158138

159139
void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
160-
if (!FoldTail) {
161-
setBlockInMask(HeaderVPBB, nullptr);
140+
if (!FoldTail)
162141
return;
163-
}
164142

165143
// Introduce the early-exit compare IV <= BTC to form header block mask.
166144
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
@@ -175,7 +153,7 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
175153

176154
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
177155
VPValue *BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
178-
setBlockInMask(HeaderVPBB, BlockMask);
156+
HeaderVPBB->setEntryMask(BlockMask);
179157
}
180158

181159
void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
@@ -201,7 +179,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
201179

202180
// We need to handle 2 separate cases below for all entries in Dst2Compares,
203181
// which excludes destinations matching the default destination.
204-
VPValue *SrcMask = getBlockInMask(Src);
182+
VPValue *SrcMask = Src->getEntryMask();
205183
VPValue *DefaultMask = nullptr;
206184
for (const auto &[Dst, Conds] : Dst2Compares) {
207185
// 1. Dst is not the default destination. Dst is reached if any of the
@@ -261,8 +239,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
261239
}
262240
}
263241

264-
DenseMap<VPBasicBlock *, VPValue *>
265-
VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
242+
void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
266243
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
267244
// Scan the body of the loop in a topological order to visit each basic block
268245
// after having visited its predecessor basic blocks.
@@ -301,5 +278,4 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
301278

302279
PrevVPBB = VPBB;
303280
}
304-
return Predicator.getBlockMaskCache();
305281
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -360,11 +360,8 @@ struct VPlanTransforms {
360360
/// Predicate and linearize the control-flow in the only loop region of
361361
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
362362
/// header, otherwise use all-true for the header mask. Masks for blocks are
363-
/// added to a block-to-mask map which is returned in order to be used later
364-
/// for wide recipe construction. This argument is temporary and will be
365-
/// removed in the future.
366-
static DenseMap<VPBasicBlock *, VPValue *>
367-
introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
363+
/// added to blocks themselves.
364+
static void introduceMasksAndLinearize(VPlan &Plan, bool FoldTail);
368365

369366
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
370367
/// a BranchOnCond recipe.

0 commit comments

Comments
 (0)