@@ -253,7 +253,7 @@ bool LoopIdiomVectorize::run(Loop *L) {
253
253
254
254
if (recognizeMinIdxPattern ())
255
255
return true ;
256
-
256
+
257
257
return false ;
258
258
}
259
259
@@ -448,7 +448,6 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
448
448
auto *VecTy = ScalableVectorType::get (
449
449
LoadType, VF); // This is the vector type for i32 values
450
450
451
-
452
451
// High-level overview of the transformation:
453
452
// We divide the process in three phases:
454
453
// In the first phase, we process a chunk which is not multiple of VF.
@@ -470,6 +469,8 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
470
469
// The below basic blocks are used to process the first phase
471
470
// and are for processing the chunk which is not multiple of VF.
472
471
BasicBlock *VecEntry = BasicBlock::Create (Ctx, " minidx.vec.entry" , F);
472
+ BasicBlock *VecScalarForkBlock =
473
+ BasicBlock::Create (Ctx, " minidx.vec.scalar.fork" , F);
473
474
BasicBlock *MinIdxPartial1If =
474
475
BasicBlock::Create (Ctx, " minidx.partial.1.if" , F);
475
476
BasicBlock *MinIdxPartial1ProcExit =
@@ -501,8 +502,41 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
501
502
502
503
LI->addTopLevelLoop (VecLoop);
503
504
504
- // Start populating preheader.
505
+ // In loop preheader, we check to fail fast.
506
+ // If the FirstIndex is equal to the SecondIndex,
507
+ // we branch to the exit block and return the SecondIndex.
508
+ // Thus, the loop preheader is split into two blocks.
509
+ // The original one has the early exit check
510
+ // and the new one sets up the code for vectorization.
511
+ // TODO: Can use splitBasicBlock(...) API to split the loop preheader.
512
+
505
513
IRBuilder<> Builder (LoopPreheader->getTerminator ());
514
+ Value *FirstIndexCmp =
515
+ Builder.CreateICmpEQ (FirstIndex, SecondIndex, " first.index.cmp" );
516
+ Value *SecondIndexBitCast = Builder.CreateTruncOrBitCast (
517
+ SecondIndex, F->getReturnType (), " second.index.bitcast" );
518
+ Builder.CreateCondBr (FirstIndexCmp, ExitBB, VecScalarForkBlock);
519
+
520
+ // Add edges from LoopPreheader to VecScalarForkBlock and ExitBB.
521
+ DTU.applyUpdates (
522
+ {{DominatorTree::Insert, LoopPreheader, VecScalarForkBlock}});
523
+ DTU.applyUpdates ({{DominatorTree::Insert, LoopPreheader, ExitBB}});
524
+
525
+ DTU.applyUpdates ({{DominatorTree::Insert, VecScalarForkBlock, Header}});
526
+ DTU.applyUpdates ({{DominatorTree::Insert, VecScalarForkBlock, ExitBB}});
527
+
528
+ // We change PHI values in the loop's header to point to the new block.
529
+ // This is done to avoid the PHI node being optimized out.
530
+ for (PHINode &PHI : Header->phis ()) {
531
+ PHI.replaceIncomingBlockWith (LoopPreheader, VecScalarForkBlock);
532
+ }
533
+
534
+ // Change the name as it is no longer the loop preheader.
535
+ LoopPreheader->setName (" minidx.early.exit1" );
536
+
537
+ // Start populating preheader.
538
+ Builder.SetInsertPoint (VecScalarForkBlock);
539
+
506
540
// %VScale = tail call i64 @llvm.vscale.i64()
507
541
// %VLen = shl nuw nsw i64 %VScale, 2
508
542
// %minidx.not = sub nsw i64 0, %VLen
@@ -571,7 +605,7 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
571
605
LoopPreheader->getTerminator ()->eraseFromParent ();
572
606
573
607
// Add edge from preheader to VecEntry
574
- DTU.applyUpdates ({{DominatorTree::Insert, LoopPreheader , VecEntry}});
608
+ DTU.applyUpdates ({{DominatorTree::Insert, VecScalarForkBlock , VecEntry}});
575
609
576
610
// %minidx.entry.cmp = fcmp olt float %minidx.minVal, %init
577
611
// br i1 %minidx.entry.cmp, label %minidx.partial.1.if, label
@@ -835,7 +869,6 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
835
869
{MaskTy, I64Ty}),
836
870
{FirstIndex, MinIdxPartial2IfAdd}, " minidx.partial.2.if.mask" );
837
871
838
-
839
872
Value *FirstIndexMinus1 =
840
873
Builder.CreateSub (FirstIndex, ConstantInt::get (I64Ty, 1 ),
841
874
" minidx.partial.2.if.firstindex.minus1" );
@@ -856,8 +889,9 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
856
889
{MinIdxPartial2IfGEP, ConstantInt::get (I32Ty, 1 ),
857
890
MinIdxPartial2IfMask, Constant::getNullValue (VecTy)},
858
891
" minidx.partial.2.if.load" );
859
- Value *MinIdxPartial2IfSelectVals =
860
- Builder.CreateSelect (MinIdxPartial2IfMask, MinIdxPartial2IfLoad, GMax, " minidx.partial2.if.finalVals" );
892
+ Value *MinIdxPartial2IfSelectVals =
893
+ Builder.CreateSelect (MinIdxPartial2IfMask, MinIdxPartial2IfLoad, GMax,
894
+ " minidx.partial2.if.finalVals" );
861
895
862
896
// Reverse the mask.
863
897
MinIdxPartial2IfMask = Builder.CreateCall (
@@ -962,12 +996,14 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
962
996
for (PHINode *PHI : PHIsToReplace) {
963
997
// Create PHI at the beginning of the block
964
998
Builder.SetInsertPoint (ExitBB, ExitBB->getFirstInsertionPt ());
999
+ // TODO: Add comment.
965
1000
PHINode *ExitPHI =
966
- Builder.CreatePHI (F->getReturnType (), PHI->getNumIncomingValues () + 1 );
1001
+ Builder.CreatePHI (F->getReturnType (), PHI->getNumIncomingValues () + 2 );
967
1002
for (unsigned I = 0 ; I < PHI->getNumIncomingValues (); ++I) {
968
1003
ExitPHI->addIncoming (PHI->getIncomingValue (I), PHI->getIncomingBlock (I));
969
1004
}
970
1005
ExitPHI->addIncoming (MinIdxRetBitCast, MinIdxEnd);
1006
+ ExitPHI->addIncoming (SecondIndexBitCast, LoopPreheader);
971
1007
// Replace all uses of PHI with ExitPHI.
972
1008
PHI->replaceAllUsesWith (ExitPHI);
973
1009
PHI->eraseFromParent ();
0 commit comments