Skip to content

Commit 1eb1350

Browse files
committed
Add early exit
1 parent 5f242f7 commit 1eb1350

File tree

1 file changed

+44
-8
lines changed

1 file changed

+44
-8
lines changed

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ bool LoopIdiomVectorize::run(Loop *L) {
253253

254254
if (recognizeMinIdxPattern())
255255
return true;
256-
256+
257257
return false;
258258
}
259259

@@ -448,7 +448,6 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
448448
auto *VecTy = ScalableVectorType::get(
449449
LoadType, VF); // This is the vector type for i32 values
450450

451-
452451
// High-level overview of the transformation:
453452
// We divide the process in three phases:
454453
// In the first phase, we process a chunk which is not multiple of VF.
@@ -470,6 +469,8 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
470469
// The below basic blocks are used to process the first phase
471470
// and are for processing the chunk which is not multiple of VF.
472471
BasicBlock *VecEntry = BasicBlock::Create(Ctx, "minidx.vec.entry", F);
472+
BasicBlock *VecScalarForkBlock =
473+
BasicBlock::Create(Ctx, "minidx.vec.scalar.fork", F);
473474
BasicBlock *MinIdxPartial1If =
474475
BasicBlock::Create(Ctx, "minidx.partial.1.if", F);
475476
BasicBlock *MinIdxPartial1ProcExit =
@@ -501,8 +502,41 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
501502

502503
LI->addTopLevelLoop(VecLoop);
503504

504-
// Start populating preheader.
505+
// In loop preheader, we check to fail fast.
506+
// If the FirstIndex is equal to the SecondIndex,
507+
// we branch to the exit block and return the SecondIndex.
508+
// Thus, the loop preheader is split into two blocks.
509+
// The original one has the early exit check
510+
// and the new one sets up the code for vectorization.
511+
// TODO: Can use splitBasicBlock(...) API to split the loop preheader.
512+
505513
IRBuilder<> Builder(LoopPreheader->getTerminator());
514+
Value *FirstIndexCmp =
515+
Builder.CreateICmpEQ(FirstIndex, SecondIndex, "first.index.cmp");
516+
Value *SecondIndexBitCast = Builder.CreateTruncOrBitCast(
517+
SecondIndex, F->getReturnType(), "second.index.bitcast");
518+
Builder.CreateCondBr(FirstIndexCmp, ExitBB, VecScalarForkBlock);
519+
520+
// Add edges from LoopPreheader to VecScalarForkBlock and ExitBB.
521+
DTU.applyUpdates(
522+
{{DominatorTree::Insert, LoopPreheader, VecScalarForkBlock}});
523+
DTU.applyUpdates({{DominatorTree::Insert, LoopPreheader, ExitBB}});
524+
525+
DTU.applyUpdates({{DominatorTree::Insert, VecScalarForkBlock, Header}});
526+
DTU.applyUpdates({{DominatorTree::Insert, VecScalarForkBlock, ExitBB}});
527+
528+
// We change PHI values in the loop's header to point to the new block.
529+
// This is done to avoid the PHI node being optimized out.
530+
for (PHINode &PHI : Header->phis()) {
531+
PHI.replaceIncomingBlockWith(LoopPreheader, VecScalarForkBlock);
532+
}
533+
534+
// Change the name as it is no longer the loop preheader.
535+
LoopPreheader->setName("minidx.early.exit1");
536+
537+
// Start populating preheader.
538+
Builder.SetInsertPoint(VecScalarForkBlock);
539+
506540
// %VScale = tail call i64 @llvm.vscale.i64()
507541
// %VLen = shl nuw nsw i64 %VScale, 2
508542
// %minidx.not = sub nsw i64 0, %VLen
@@ -571,7 +605,7 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
571605
LoopPreheader->getTerminator()->eraseFromParent();
572606

573607
// Add edge from preheader to VecEntry
574-
DTU.applyUpdates({{DominatorTree::Insert, LoopPreheader, VecEntry}});
608+
DTU.applyUpdates({{DominatorTree::Insert, VecScalarForkBlock, VecEntry}});
575609

576610
// %minidx.entry.cmp = fcmp olt float %minidx.minVal, %init
577611
// br i1 %minidx.entry.cmp, label %minidx.partial.1.if, label
@@ -835,7 +869,6 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
835869
{MaskTy, I64Ty}),
836870
{FirstIndex, MinIdxPartial2IfAdd}, "minidx.partial.2.if.mask");
837871

838-
839872
Value *FirstIndexMinus1 =
840873
Builder.CreateSub(FirstIndex, ConstantInt::get(I64Ty, 1),
841874
"minidx.partial.2.if.firstindex.minus1");
@@ -856,8 +889,9 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
856889
{MinIdxPartial2IfGEP, ConstantInt::get(I32Ty, 1),
857890
MinIdxPartial2IfMask, Constant::getNullValue(VecTy)},
858891
"minidx.partial.2.if.load");
859-
Value *MinIdxPartial2IfSelectVals =
860-
Builder.CreateSelect(MinIdxPartial2IfMask, MinIdxPartial2IfLoad, GMax, "minidx.partial2.if.finalVals");
892+
Value *MinIdxPartial2IfSelectVals =
893+
Builder.CreateSelect(MinIdxPartial2IfMask, MinIdxPartial2IfLoad, GMax,
894+
"minidx.partial2.if.finalVals");
861895

862896
// Reverse the mask.
863897
MinIdxPartial2IfMask = Builder.CreateCall(
@@ -962,12 +996,14 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
962996
for (PHINode *PHI : PHIsToReplace) {
963997
// Create PHI at the beginning of the block
964998
Builder.SetInsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
999+
// TODO: Add comment.
9651000
PHINode *ExitPHI =
966-
Builder.CreatePHI(F->getReturnType(), PHI->getNumIncomingValues() + 1);
1001+
Builder.CreatePHI(F->getReturnType(), PHI->getNumIncomingValues() + 2);
9671002
for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I) {
9681003
ExitPHI->addIncoming(PHI->getIncomingValue(I), PHI->getIncomingBlock(I));
9691004
}
9701005
ExitPHI->addIncoming(MinIdxRetBitCast, MinIdxEnd);
1006+
ExitPHI->addIncoming(SecondIndexBitCast, LoopPreheader);
9711007
// Replace all uses of PHI with ExitPHI.
9721008
PHI->replaceAllUsesWith(ExitPHI);
9731009
PHI->eraseFromParent();

0 commit comments

Comments
 (0)