2121using namespace llvm ;
2222
2323namespace {
24- struct VPPredicator {
24+ class VPPredicator {
2525 using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
26- VPPredicator (BlockMaskCacheTy &BlockMaskCache)
27- : BlockMaskCache(BlockMaskCache) {}
28-
2926 // / Builder to construct recipes to compute masks.
3027 VPBuilder Builder;
3128
@@ -38,35 +35,45 @@ struct VPPredicator {
3835
3936 BlockMaskCacheTy &BlockMaskCache;
4037
41- // / Returns the previously computed predicate of the edge between \p Src and
42- // / \p Dst.
43- VPValue * getEdgeMask ( const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
44- return EdgeMaskCache. lookup ({ Src, Dst});
45- }
38+ // / Create an edge mask for every destination of cases and/or default.
39+ void createSwitchEdgeMasks (VPInstruction *SI);
40+
41+ // / Computes and return the predicate of the edge between \p Src and \p Dst.
42+ VPValue * createEdgeMask (VPBasicBlock *Src, VPBasicBlock *Dst);
4643
4744 // / Returns the *entry* mask for \p VPBB.
4845 VPValue *getBlockInMask (VPBasicBlock *VPBB) const {
4946 return BlockMaskCache.lookup (VPBB);
5047 }
48+
5149 void setBlockInMask (VPBasicBlock *VPBB, VPValue *Mask) {
5250 // TODO: Include the masks as operands in the predicated VPlan directly to
5351 // remove the need to keep a map of masks beyond the predication transform.
54- assert (!BlockMaskCache. contains (VPBB) && " Mask already set" );
52+ assert (!getBlockInMask (VPBB) && " Mask already set" );
5553 BlockMaskCache[VPBB] = Mask;
5654 }
5755
56+ VPValue *setEdgeMask (const VPBasicBlock *Src, const VPBasicBlock *Dst,
57+ VPValue *Mask) {
58+ assert (!getEdgeMask (Src, Dst) && " Mask already set" );
59+ return EdgeMaskCache[{Src, Dst}] = Mask;
60+ }
61+
62+ public:
63+ VPPredicator (BlockMaskCacheTy &BlockMaskCache)
64+ : BlockMaskCache(BlockMaskCache) {}
65+
66+ // / Returns the precomputed predicate of the edge from \p Src to \p Dst.
67+ VPValue *getEdgeMask (const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
68+ return EdgeMaskCache.lookup ({Src, Dst});
69+ }
70+
5871 // / Compute and return the mask for the vector loop header block.
5972 void createHeaderMask (VPBasicBlock *HeaderVPBB, bool FoldTail);
6073
6174 // / Compute and return the predicate of \p VPBB, assuming that the header
6275 // / block of the loop is set to True or the loop mask when tail folding.
6376 VPValue *createBlockInMask (VPBasicBlock *VPBB);
64-
65- // / Computes and return the predicate of the edge between \p Src and \p Dst.
66- VPValue *createEdgeMask (VPBasicBlock *Src, VPBasicBlock *Dst);
67-
68- // / Create an edge mask for every destination of cases and/or default.
69- void createSwitchEdgeMasks (VPInstruction *SI);
7077};
7178} // namespace
7279
@@ -80,40 +87,35 @@ VPValue *VPPredicator::createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
8087
8188 VPValue *SrcMask = getBlockInMask (Src);
8289
83- // The terminator has to be a branch inst!
84- if (Src->empty () || Src->getNumSuccessors () == 1 ) {
85- EdgeMaskCache[{Src, Dst}] = SrcMask;
86- return SrcMask;
87- }
90+ // If there's a single successor, there's no terminator recipe.
91+ if (Src->getNumSuccessors () == 1 )
92+ return setEdgeMask (Src, Dst, SrcMask);
8893
8994 auto *Term = cast<VPInstruction>(Src->getTerminator ());
9095 if (Term->getOpcode () == Instruction::Switch) {
9196 createSwitchEdgeMasks (Term);
9297 return getEdgeMask (Src, Dst);
9398 }
9499
95- auto *BI = cast<VPInstruction>(Src->getTerminator ());
96- assert (BI->getOpcode () == VPInstruction::BranchOnCond);
97- if (Src->getSuccessors ()[0 ] == Src->getSuccessors ()[1 ]) {
98- EdgeMaskCache[{Src, Dst}] = SrcMask;
99- return SrcMask;
100- }
100+ assert (Term->getOpcode () == VPInstruction::BranchOnCond &&
101+ " Unsupported terminator" );
102+ if (Src->getSuccessors ()[0 ] == Src->getSuccessors ()[1 ])
103+ return setEdgeMask (Src, Dst, SrcMask);
101104
102- EdgeMask = BI ->getOperand (0 );
105+ EdgeMask = Term ->getOperand (0 );
103106 assert (EdgeMask && " No Edge Mask found for condition" );
104107
105108 if (Src->getSuccessors ()[0 ] != Dst)
106- EdgeMask = Builder.createNot (EdgeMask, BI ->getDebugLoc ());
109+ EdgeMask = Builder.createNot (EdgeMask, Term ->getDebugLoc ());
107110
108111 if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
109112 // The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
110113 // is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
111114 // instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
112- EdgeMask = Builder.createLogicalAnd (SrcMask, EdgeMask, BI ->getDebugLoc ());
115+ EdgeMask = Builder.createLogicalAnd (SrcMask, EdgeMask, Term ->getDebugLoc ());
113116 }
114117
115- EdgeMaskCache[{Src, Dst}] = EdgeMask;
116- return EdgeMask;
118+ return setEdgeMask (Src, Dst, EdgeMask);
117119}
118120
119121VPValue *VPPredicator::createBlockInMask (VPBasicBlock *VPBB) {
@@ -131,7 +133,7 @@ VPValue *VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
131133 return EdgeMask;
132134 }
133135
134- if (!BlockMask) { // BlockMask has its initialized nullptr value.
136+ if (!BlockMask) { // BlockMask has its initial nullptr value.
135137 BlockMask = EdgeMask;
136138 continue ;
137139 }
@@ -159,11 +161,9 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
159161 auto *IV = new VPWidenCanonicalIVRecipe (Plan.getCanonicalIV ());
160162 HeaderVPBB->insert (IV, NewInsertionPoint);
161163
162- VPBuilder::InsertPointGuard Guard (Builder);
163164 Builder.setInsertPoint (HeaderVPBB, NewInsertionPoint);
164- VPValue *BlockMask = nullptr ;
165165 VPValue *BTC = Plan.getOrCreateBackedgeTakenCount ();
166- BlockMask = Builder.createICmp (CmpInst::ICMP_ULE, IV, BTC);
166+ VPValue * BlockMask = Builder.createICmp (CmpInst::ICMP_ULE, IV, BTC);
167167 setBlockInMask (HeaderVPBB, BlockMask);
168168}
169169
@@ -179,7 +179,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
179179 for (const auto &[Idx, Succ] :
180180 enumerate(ArrayRef (Src->getSuccessors ()).drop_front ())) {
181181 VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
182- assert (!EdgeMaskCache. contains ({ Src, Dst} ) && " Edge masks already created" );
182+ assert (!getEdgeMask ( Src, Dst) && " Edge masks already created" );
183183 // Cases whose destination is the same as default are redundant and can
184184 // be ignored - they will get there anyhow.
185185 if (Dst == DefaultDst)
@@ -202,7 +202,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
202202 Mask = Builder.createOr (Mask, V);
203203 if (SrcMask)
204204 Mask = Builder.createLogicalAnd (SrcMask, Mask);
205- EdgeMaskCache[{ Src, Dst}] = Mask;
205+ setEdgeMask ( Src, Dst, Mask) ;
206206
207207 // 2. Create the mask for the default destination, which is reached if
208208 // none of the cases with destination != default destination are taken.
@@ -216,7 +216,7 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
216216 if (SrcMask)
217217 DefaultMask = Builder.createLogicalAnd (SrcMask, DefaultMask);
218218 }
219- EdgeMaskCache[{ Src, DefaultDst}] = DefaultMask;
219+ setEdgeMask ( Src, DefaultDst, DefaultMask) ;
220220}
221221
222222void VPlanTransforms::predicateAndLinearize (
@@ -229,7 +229,12 @@ void VPlanTransforms::predicateAndLinearize(
229229 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
230230 Header);
231231 VPPredicator Predicator (BlockMaskCache);
232- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
232+ for (VPBlockBase *VPB : RPOT) {
233+ // Only regions with only VPBBs are supported at the moment.
234+ auto *VPBB = cast<VPBasicBlock>(VPB);
235+ // Introduce the mask for VPBB, which may introduce needed edge masks, and
236+ // convert all phi recipes of VPBB to blend recipes unless VPBB is the
237+ // header.
233238 if (VPBB == Header) {
234239 Predicator.createHeaderMask (Header, FoldTail);
235240 continue ;
@@ -241,53 +246,51 @@ void VPlanTransforms::predicateAndLinearize(
241246
242247 Predicator.createBlockInMask (VPBB);
243248
244- for (VPWidenPHIRecipe *Phi : Phis) {
245- PHINode *IRPhi = cast<PHINode>(Phi->getUnderlyingValue ());
246-
247- unsigned NumIncoming = IRPhi->getNumIncomingValues ();
248-
249- // We know that all PHIs in non-header blocks are converted into selects,
249+ for (VPWidenPHIRecipe *PhiR : Phis) {
250+ // The non-header Phi is converted into a Blend recipe below,
250251 // so we don't have to worry about the insertion order and we can just use
251252 // the builder. At this point we generate the predication tree. There may
252253 // be duplications since this is a simple recursive scan, but future
253254 // optimizations will clean it up.
254255
255256 SmallVector<VPValue *, 2 > OperandsWithMask;
257+ unsigned NumIncoming = PhiR->getNumIncoming ();
256258 for (unsigned In = 0 ; In < NumIncoming; In++) {
257- const VPBasicBlock *Pred = Phi ->getIncomingBlock (In);
258- OperandsWithMask.push_back (Phi ->getIncomingValue (In));
259+ const VPBasicBlock *Pred = PhiR ->getIncomingBlock (In);
260+ OperandsWithMask.push_back (PhiR ->getIncomingValue (In));
259261 VPValue *EdgeMask = Predicator.getEdgeMask (Pred, VPBB);
260262 if (!EdgeMask) {
261263 assert (In == 0 && " Both null and non-null edge masks found" );
262- assert (all_equal (Phi ->operands ()) &&
264+ assert (all_equal (PhiR ->operands ()) &&
263265 " Distinct incoming values with one having a full mask" );
264266 break ;
265267 }
266268 OperandsWithMask.push_back (EdgeMask);
267269 }
270+ PHINode *IRPhi = cast<PHINode>(PhiR->getUnderlyingValue ());
268271 auto *Blend = new VPBlendRecipe (IRPhi, OperandsWithMask);
269- Blend->insertBefore (Phi );
270- Phi ->replaceAllUsesWith (Blend);
271- Phi ->eraseFromParent ();
272+ Blend->insertBefore (PhiR );
273+ PhiR ->replaceAllUsesWith (Blend);
274+ PhiR ->eraseFromParent ();
272275 }
273276 }
274277
278+ // Linearize the blocks of the loop into one serial chain.
275279 VPBlockBase *PrevVPBB = nullptr ;
276280 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
277281 // Handle VPBBs down to the latch.
278- if (PrevVPBB && VPBB == LoopRegion->getExiting ()) {
279- VPBlockUtils::connectBlocks (PrevVPBB, VPBB);
282+ if (VPBB == LoopRegion->getExiting ()) {
283+ if (PrevVPBB)
284+ VPBlockUtils::connectBlocks (PrevVPBB, VPBB);
280285 break ;
281286 }
282287
283288 auto Successors = to_vector (VPBB->getSuccessors ());
284289 if (Successors.size () > 1 )
285290 VPBB->getTerminator ()->eraseFromParent ();
286291
287- // Flatten the CFG in the loop. Masks for blocks have already been
288- // generated and added to recipes as needed. To do so, first disconnect
289- // VPBB from its successors. Then connect VPBB to the previously visited
290- // VPBB.
292+ // Flatten the CFG in the loop. To do so, first disconnect VPBB from its
293+ // successors. Then connect VPBB to the previously visited VPBB.
291294 for (auto *Succ : Successors)
292295 VPBlockUtils::disconnectBlocks (VPBB, Succ);
293296 if (PrevVPBB)
0 commit comments