Skip to content

Commit 58c8fc4

Browse files
committed
!fixup address comments, thanks
1 parent 25316c2 commit 58c8fc4

File tree

4 files changed

+84
-72
lines changed

4 files changed

+84
-72
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9254,6 +9254,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
92549254

92559255
auto *MiddleVPBB = Plan->getMiddleBlock();
92569256
VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
9257+
// Mapping from VPValues in the initial plan to their widened VPValues. Needed
9258+
// temporarily to update created block masks.
9259+
DenseMap<VPValue *, VPValue *> Old2New;
92579260
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
92589261
// Convert input VPInstructions to widened recipes.
92599262
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -9313,19 +9316,23 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
93139316
}
93149317
if (Recipe->getNumDefinedValues() == 1) {
93159318
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
9316-
// replaceAllUsesWith may invalidate the block mask cache. Update it.
9317-
// TODO: Include the masks as operands in the predicated VPlan directly
9318-
// to remove the need to keep a map of masks beyond the predication
9319-
// transform.
9320-
RecipeBuilder.updateBlockMaskCache(SingleDef,
9321-
Recipe->getVPSingleValue());
9322-
} else
9319+
Old2New[SingleDef] = Recipe->getVPSingleValue();
9320+
} else {
93239321
assert(Recipe->getNumDefinedValues() == 0 &&
93249322
"Unexpected multidef recipe");
9325-
R.eraseFromParent();
9323+
R.eraseFromParent();
9324+
}
93269325
}
93279326
}
93289327

9328+
// replaceAllUsesWith above may invalidate the block masks. Update them here.
9329+
// TODO: Include the masks as operands in the predicated VPlan directly
9330+
// to remove the need to keep a map of masks beyond the predication
9331+
// transform.
9332+
RecipeBuilder.updateBlockMaskCache(Old2New);
9333+
for (const auto &[Old, New] : Old2New)
9334+
Old->getDefiningRecipe()->eraseFromParent();
9335+
93299336
assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
93309337
!Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
93319338
"entry block must be set to a VPRegionBlock having a non-empty entry "

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,12 @@ class VPRecipeBuilder {
209209
return Plan.getOrAddLiveIn(V);
210210
}
211211

212-
void updateBlockMaskCache(VPValue *Old, VPValue *New) {
212+
void updateBlockMaskCache(const DenseMap<VPValue *, VPValue *> &Old2New) {
213213
for (auto &[_, V] : BlockMaskCache) {
214-
if (V == Old)
214+
if (auto *New = Old2New.lookup(V)) {
215+
V->replaceAllUsesWith(New);
215216
V = New;
217+
}
216218
}
217219
}
218220
};

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 61 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,8 @@
2121
using namespace llvm;
2222

2323
namespace {
24-
struct VPPredicator {
24+
class VPPredicator {
2525
using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
26-
VPPredicator(BlockMaskCacheTy &BlockMaskCache)
27-
: BlockMaskCache(BlockMaskCache) {}
28-
2926
/// Builder to construct recipes to compute masks.
3027
VPBuilder Builder;
3128

@@ -38,35 +35,45 @@ struct VPPredicator {
3835

3936
BlockMaskCacheTy &BlockMaskCache;
4037

41-
/// Returns the previously computed predicate of the edge between \p Src and
42-
/// \p Dst.
43-
VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
44-
return EdgeMaskCache.lookup({Src, Dst});
45-
}
38+
/// Create an edge mask for every destination of cases and/or default.
39+
void createSwitchEdgeMasks(VPInstruction *SI);
40+
41+
/// Computes and return the predicate of the edge between \p Src and \p Dst.
42+
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);
4643

4744
/// Returns the *entry* mask for \p VPBB.
4845
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
4946
return BlockMaskCache.lookup(VPBB);
5047
}
48+
5149
void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
5250
// TODO: Include the masks as operands in the predicated VPlan directly to
5351
// remove the need to keep a map of masks beyond the predication transform.
54-
assert(!BlockMaskCache.contains(VPBB) && "Mask already set");
52+
assert(!getBlockInMask(VPBB) && "Mask already set");
5553
BlockMaskCache[VPBB] = Mask;
5654
}
5755

56+
VPValue *setEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst,
57+
VPValue *Mask) {
58+
assert(!getEdgeMask(Src, Dst) && "Mask already set");
59+
return EdgeMaskCache[{Src, Dst}] = Mask;
60+
}
61+
62+
public:
63+
VPPredicator(BlockMaskCacheTy &BlockMaskCache)
64+
: BlockMaskCache(BlockMaskCache) {}
65+
66+
/// Returns the precomputed predicate of the edge from \p Src to \p Dst.
67+
VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
68+
return EdgeMaskCache.lookup({Src, Dst});
69+
}
70+
5871
/// Compute and return the mask for the vector loop header block.
5972
void createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail);
6073

6174
/// Compute and return the predicate of \p VPBB, assuming that the header
6275
/// block of the loop is set to True or the loop mask when tail folding.
6376
VPValue *createBlockInMask(VPBasicBlock *VPBB);
64-
65-
/// Computes and return the predicate of the edge between \p Src and \p Dst.
66-
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);
67-
68-
/// Create an edge mask for every destination of cases and/or default.
69-
void createSwitchEdgeMasks(VPInstruction *SI);
7077
};
7178
} // namespace
7279

@@ -80,40 +87,35 @@ VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
8087

8188
VPValue *SrcMask = getBlockInMask(Src);
8289

83-
// The terminator has to be a branch inst!
84-
if (Src->empty() || Src->getNumSuccessors() == 1) {
85-
EdgeMaskCache[{Src, Dst}] = SrcMask;
86-
return SrcMask;
87-
}
90+
// If there's a single successor, there's no terminator recipe.
91+
if (Src->getNumSuccessors() == 1)
92+
return setEdgeMask(Src, Dst, SrcMask);
8893

8994
auto *Term = cast<VPInstruction>(Src->getTerminator());
9095
if (Term->getOpcode() == Instruction::Switch) {
9196
createSwitchEdgeMasks(Term);
9297
return getEdgeMask(Src, Dst);
9398
}
9499

95-
auto *BI = cast<VPInstruction>(Src->getTerminator());
96-
assert(BI->getOpcode() == VPInstruction::BranchOnCond);
97-
if (Src->getSuccessors()[0] == Src->getSuccessors()[1]) {
98-
EdgeMaskCache[{Src, Dst}] = SrcMask;
99-
return SrcMask;
100-
}
100+
assert(Term->getOpcode() == VPInstruction::BranchOnCond &&
101+
"Unsupported terminator");
102+
if (Src->getSuccessors()[0] == Src->getSuccessors()[1])
103+
return setEdgeMask(Src, Dst, SrcMask);
101104

102-
EdgeMask = BI->getOperand(0);
105+
EdgeMask = Term->getOperand(0);
103106
assert(EdgeMask && "No Edge Mask found for condition");
104107

105108
if (Src->getSuccessors()[0] != Dst)
106-
EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
109+
EdgeMask = Builder.createNot(EdgeMask, Term->getDebugLoc());
107110

108111
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
109112
// The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
110113
// is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
111114
// instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
112-
EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, BI->getDebugLoc());
115+
EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, Term->getDebugLoc());
113116
}
114117

115-
EdgeMaskCache[{Src, Dst}] = EdgeMask;
116-
return EdgeMask;
118+
return setEdgeMask(Src, Dst, EdgeMask);
117119
}
118120

119121
VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
@@ -131,7 +133,7 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
131133
return EdgeMask;
132134
}
133135

134-
if (!BlockMask) { // BlockMask has its initialized nullptr value.
136+
if (!BlockMask) { // BlockMask has its initial nullptr value.
135137
BlockMask = EdgeMask;
136138
continue;
137139
}
@@ -159,11 +161,9 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
159161
auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
160162
HeaderVPBB->insert(IV, NewInsertionPoint);
161163

162-
VPBuilder::InsertPointGuard Guard(Builder);
163164
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
164-
VPValue *BlockMask = nullptr;
165165
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
166-
BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
166+
VPValue *BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
167167
setBlockInMask(HeaderVPBB, BlockMask);
168168
}
169169

@@ -179,7 +179,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
179179
for (const auto &[Idx, Succ] :
180180
enumerate(ArrayRef(Src->getSuccessors()).drop_front())) {
181181
VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
182-
assert(!EdgeMaskCache.contains({Src, Dst}) && "Edge masks already created");
182+
assert(!getEdgeMask(Src, Dst) && "Edge masks already created");
183183
// Cases whose destination is the same as default are redundant and can
184184
// be ignored - they will get there anyhow.
185185
if (Dst == DefaultDst)
@@ -202,7 +202,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
202202
Mask = Builder.createOr(Mask, V);
203203
if (SrcMask)
204204
Mask = Builder.createLogicalAnd(SrcMask, Mask);
205-
EdgeMaskCache[{Src, Dst}] = Mask;
205+
setEdgeMask(Src, Dst, Mask);
206206

207207
// 2. Create the mask for the default destination, which is reached if
208208
// none of the cases with destination != default destination are taken.
@@ -216,7 +216,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
216216
if (SrcMask)
217217
DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
218218
}
219-
EdgeMaskCache[{Src, DefaultDst}] = DefaultMask;
219+
setEdgeMask(Src, DefaultDst, DefaultMask);
220220
}
221221

222222
void VPlanTransforms::predicateAndLinearize(
@@ -229,7 +229,12 @@ void VPlanTransforms::predicateAndLinearize(
229229
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
230230
Header);
231231
VPPredicator Predicator(BlockMaskCache);
232-
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
232+
for (VPBlockBase *VPB : RPOT) {
233+
// Only regions with only VPBBs are supported at the moment.
234+
auto *VPBB = cast<VPBasicBlock>(VPB);
235+
// Introduce the mask for VPBB, which may introduce needed edge masks, and
236+
// convert all phi recipes of VPBB to blend recipes unless VPBB is the
237+
// header.
233238
if (VPBB == Header) {
234239
Predicator.createHeaderMask(Header, FoldTail);
235240
continue;
@@ -241,53 +246,51 @@ void VPlanTransforms::predicateAndLinearize(
241246

242247
Predicator.createBlockInMask(VPBB);
243248

244-
for (VPWidenPHIRecipe *Phi : Phis) {
245-
PHINode *IRPhi = cast<PHINode>(Phi->getUnderlyingValue());
246-
247-
unsigned NumIncoming = IRPhi->getNumIncomingValues();
248-
249-
// We know that all PHIs in non-header blocks are converted into selects,
249+
for (VPWidenPHIRecipe *PhiR : Phis) {
250+
// The non-header Phi is converted into a Blend recipe below,
250251
// so we don't have to worry about the insertion order and we can just use
251252
// the builder. At this point we generate the predication tree. There may
252253
// be duplications since this is a simple recursive scan, but future
253254
// optimizations will clean it up.
254255

255256
SmallVector<VPValue *, 2> OperandsWithMask;
257+
unsigned NumIncoming = PhiR->getNumIncoming();
256258
for (unsigned In = 0; In < NumIncoming; In++) {
257-
const VPBasicBlock *Pred = Phi->getIncomingBlock(In);
258-
OperandsWithMask.push_back(Phi->getIncomingValue(In));
259+
const VPBasicBlock *Pred = PhiR->getIncomingBlock(In);
260+
OperandsWithMask.push_back(PhiR->getIncomingValue(In));
259261
VPValue *EdgeMask = Predicator.getEdgeMask(Pred, VPBB);
260262
if (!EdgeMask) {
261263
assert(In == 0 && "Both null and non-null edge masks found");
262-
assert(all_equal(Phi->operands()) &&
264+
assert(all_equal(PhiR->operands()) &&
263265
"Distinct incoming values with one having a full mask");
264266
break;
265267
}
266268
OperandsWithMask.push_back(EdgeMask);
267269
}
270+
PHINode *IRPhi = cast<PHINode>(PhiR->getUnderlyingValue());
268271
auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask);
269-
Blend->insertBefore(Phi);
270-
Phi->replaceAllUsesWith(Blend);
271-
Phi->eraseFromParent();
272+
Blend->insertBefore(PhiR);
273+
PhiR->replaceAllUsesWith(Blend);
274+
PhiR->eraseFromParent();
272275
}
273276
}
274277

278+
// Linearize the blocks of the loop into one serial chain.
275279
VPBlockBase *PrevVPBB = nullptr;
276280
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
277281
// Handle VPBBs down to the latch.
278-
if (PrevVPBB && VPBB == LoopRegion->getExiting()) {
279-
VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
282+
if (VPBB == LoopRegion->getExiting()) {
283+
if (PrevVPBB)
284+
VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
280285
break;
281286
}
282287

283288
auto Successors = to_vector(VPBB->getSuccessors());
284289
if (Successors.size() > 1)
285290
VPBB->getTerminator()->eraseFromParent();
286291

287-
// Flatten the CFG in the loop. Masks for blocks have already been
288-
// generated and added to recipes as needed. To do so, first disconnect
289-
// VPBB from its successors. Then connect VPBB to the previously visited
290-
// VPBB.
292+
// Flatten the CFG in the loop. To do so, first disconnect VPBB from its
293+
// successors. Then connect VPBB to the previously visited VPBB.
291294
for (auto *Succ : Successors)
292295
VPBlockUtils::disconnectBlocks(VPBB, Succ);
293296
if (PrevVPBB)

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,12 +223,12 @@ struct VPlanTransforms {
223223
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
224224
unsigned VectorRegWidth);
225225

226-
/// Predicate and linearize the control-flow in the top-level loop region of
226+
/// Predicate and linearize the control-flow in the only loop region of
227227
/// \p Plan. If \p FoldTail is true, also create a mask guarding the loop
228228
/// header, otherwise use all-true for the header mask. Masks for blocks are
229-
/// added to \p BlockMaskCache, which in turn is temporarily used for wide
230-
/// recipe construction. This argument is temporary and will be removed in the
231-
/// future.
229+
/// added to \p BlockMaskCache, which in turn will temporarily be used later
230+
/// for wide recipe construction. This argument is temporary and will be
231+
/// removed in the future.
232232
static void
233233
predicateAndLinearize(VPlan &Plan, bool FoldTail,
234234
DenseMap<VPBasicBlock *, VPValue *> &BlockMaskCache);

0 commit comments

Comments
 (0)