Skip to content

Commit 72fb5ba

Browse files
committed
[LV] Don't sink into replication regions
The new test case here contains a first order recurrences and an instruction that is replicated. The first order recurrence forces an instruction to be sunk _into_, as opposed to after the replication region. That causes several things to go wrong including registering vector instructions multiple times and failing to create dominance relations correctly. Instead we should be sinking to after the replication region, which is what this patch makes sure happens. Differential Revision: https://reviews.llvm.org/D93629
1 parent 1216763 commit 72fb5ba

File tree

5 files changed

+293
-0
lines changed

5 files changed

+293
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8507,6 +8507,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
85078507
for (auto &Entry : SinkAfter) {
85088508
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
85098509
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
8510+
// If the target is in a replication region, make sure to move Sink to the
8511+
// block after it, not into the replication region itself.
8512+
if (auto *Region =
8513+
dyn_cast_or_null<VPRegionBlock>(Target->getParent()->getParent())) {
8514+
if (Region->isReplicator()) {
8515+
assert(Region->getNumSuccessors() == 1 && "Expected SESE region!");
8516+
VPBasicBlock *NextBlock =
8517+
cast<VPBasicBlock>(Region->getSuccessors().front());
8518+
Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi());
8519+
continue;
8520+
}
8521+
}
85108522
Sink->moveAfter(Target);
85118523
}
85128524

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,14 @@ void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
433433
insertAfter(InsertPos);
434434
}
435435

436+
void VPRecipeBase::moveBefore(VPBasicBlock &BB,
437+
iplist<VPRecipeBase>::iterator I) {
438+
assert(I == BB.end() || I->getParent() == &BB);
439+
removeFromParent();
440+
Parent = &BB;
441+
BB.getRecipeList().insert(I, this);
442+
}
443+
436444
void VPInstruction::generateInstruction(VPTransformState &State,
437445
unsigned Part) {
438446
IRBuilder<> &Builder = State.Builder;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,11 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
664664
/// the VPBasicBlock that MovePos lives in, right after MovePos.
665665
void moveAfter(VPRecipeBase *MovePos);
666666

667+
/// Unlink this recipe and insert into BB before I.
668+
///
669+
/// \pre I is a valid iterator into BB.
670+
void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
671+
667672
/// This method unlinks 'this' from the containing basic block, but does not
668673
/// delete it.
669674
void removeFromParent();

llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,3 +645,235 @@ for.cond:
645645
for.end:
646646
ret void
647647
}
648+
649+
define i32 @sink_into_replication_region(i32 %y) {
650+
; CHECK-LABEL: @sink_into_replication_region(
651+
; CHECK-NEXT: bb:
652+
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1
653+
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1
654+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
655+
; CHECK: vector.ph:
656+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3
657+
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
658+
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1
659+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
660+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
661+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
662+
; CHECK: vector.body:
663+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE9:%.*]] ]
664+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_UDIV_CONTINUE9]] ]
665+
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE9]] ]
666+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
667+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
668+
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
669+
; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT3]], <i32 0, i32 1, i32 2, i32 3>
670+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
671+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
672+
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
673+
; CHECK: pred.udiv.if:
674+
; CHECK-NEXT: [[TMP4:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
675+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
676+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]]
677+
; CHECK: pred.udiv.continue:
678+
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
679+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
680+
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
681+
; CHECK: pred.udiv.if4:
682+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1
683+
; CHECK-NEXT: [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]]
684+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i32 1
685+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE5]]
686+
; CHECK: pred.udiv.continue5:
687+
; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF4]] ]
688+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
689+
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
690+
; CHECK: pred.udiv.if6:
691+
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2
692+
; CHECK-NEXT: [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]]
693+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i32 2
694+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE7]]
695+
; CHECK: pred.udiv.continue7:
696+
; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP15]], [[PRED_UDIV_IF6]] ]
697+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
698+
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9]]
699+
; CHECK: pred.udiv.if8:
700+
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3
701+
; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]]
702+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i32 3
703+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE9]]
704+
; CHECK: pred.udiv.continue9:
705+
; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ]
706+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
707+
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI1]], [[TMP22]]
708+
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI1]]
709+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
710+
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
711+
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !45, [[LOOP46:!llvm.loop !.*]]
712+
; CHECK: middle.block:
713+
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
714+
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP24]], [[RDX_SHUF]]
715+
; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
716+
; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF10]]
717+
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[BIN_RDX11]], i32 0
718+
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
719+
; CHECK: scalar.ph:
720+
; CHECK-NEXT: br label [[BB2:%.*]]
721+
; CHECK: bb1:
722+
; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
723+
; CHECK-NEXT: ret i32 [[TMP]]
724+
; CHECK: bb2:
725+
; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !47, [[LOOP48:!llvm.loop !.*]]
726+
;
727+
bb:
728+
br label %bb2
729+
730+
bb1: ; preds = %bb2
731+
%tmp = phi i32 [ %tmp6, %bb2 ]
732+
ret i32 %tmp
733+
734+
bb2: ; preds = %bb2, %bb
735+
%tmp3 = phi i32 [ %tmp8, %bb2 ], [ %y, %bb ]
736+
%tmp4 = phi i32 [ %tmp7, %bb2 ], [ 0, %bb ]
737+
%tmp5 = phi i32 [ %tmp6, %bb2 ], [ 0, %bb ]
738+
%tmp6 = add i32 %tmp5, %tmp4
739+
%tmp7 = udiv i32 219220132, %tmp3
740+
%tmp8 = add nsw i32 %tmp3, -1
741+
%tmp9 = icmp slt i32 %tmp3, 2
742+
br i1 %tmp9, label %bb1, label %bb2, !prof !2
743+
}
744+
745+
define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
746+
; CHECK-LABEL: @sink_into_replication_region_multiple(
747+
; CHECK-NEXT: bb:
748+
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[Y:%.*]], 1
749+
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[Y]], i32 1
750+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
751+
; CHECK: vector.ph:
752+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw i32 [[TMP1]], 3
753+
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
754+
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP1]], -1
755+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
756+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
757+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
758+
; CHECK: vector.body:
759+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
760+
; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE16]] ]
761+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_STORE_CONTINUE16]] ]
762+
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_STORE_CONTINUE16]] ]
763+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
764+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1
765+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -2
766+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -3
767+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]]
768+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
769+
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
770+
; CHECK: pred.udiv.if:
771+
; CHECK-NEXT: [[TMP7:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
772+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0
773+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]]
774+
; CHECK: pred.udiv.continue:
775+
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
776+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
777+
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
778+
; CHECK: pred.udiv.if5:
779+
; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]]
780+
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i32 1
781+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE6]]
782+
; CHECK: pred.udiv.continue6:
783+
; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF5]] ]
784+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
785+
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
786+
; CHECK: pred.udiv.if7:
787+
; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]]
788+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i32 2
789+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE8]]
790+
; CHECK: pred.udiv.continue8:
791+
; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP16]], [[PRED_UDIV_IF7]] ]
792+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
793+
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
794+
; CHECK: pred.udiv.if9:
795+
; CHECK-NEXT: [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]]
796+
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i32 3
797+
; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE10]]
798+
; CHECK: pred.udiv.continue10:
799+
; CHECK-NEXT: [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP20]], [[PRED_UDIV_IF9]] ]
800+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
801+
; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI4]], [[TMP22]]
802+
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
803+
; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
804+
; CHECK: pred.store.if:
805+
; CHECK-NEXT: [[TMP25:%.*]] = sext i32 [[INDEX]] to i64
806+
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP25]]
807+
; CHECK-NEXT: store i32 [[OFFSET_IDX]], i32* [[TMP26]], align 4
808+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
809+
; CHECK: pred.store.continue:
810+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
811+
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
812+
; CHECK: pred.store.if11:
813+
; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[INDEX]], 1
814+
; CHECK-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
815+
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]]
816+
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP30]], align 4
817+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
818+
; CHECK: pred.store.continue12:
819+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
820+
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
821+
; CHECK: pred.store.if13:
822+
; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 2
823+
; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
824+
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]]
825+
; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP34]], align 4
826+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
827+
; CHECK: pred.store.continue14:
828+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
829+
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
830+
; CHECK: pred.store.if15:
831+
; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[INDEX]], 3
832+
; CHECK-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64
833+
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]]
834+
; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP38]], align 4
835+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
836+
; CHECK: pred.store.continue16:
837+
; CHECK-NEXT: [[TMP39:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI4]]
838+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
839+
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
840+
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
841+
; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof !45, [[LOOP49:!llvm.loop !.*]]
842+
; CHECK: middle.block:
843+
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
844+
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP39]], [[RDX_SHUF]]
845+
; CHECK-NEXT: [[RDX_SHUF17:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
846+
; CHECK-NEXT: [[BIN_RDX18:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF17]]
847+
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[BIN_RDX18]], i32 0
848+
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
849+
; CHECK: scalar.ph:
850+
; CHECK-NEXT: br label [[BB2:%.*]]
851+
; CHECK: bb1:
852+
; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ]
853+
; CHECK-NEXT: ret i32 [[TMP]]
854+
; CHECK: bb2:
855+
; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof !47, [[LOOP50:!llvm.loop !.*]]
856+
;
857+
bb:
858+
br label %bb2
859+
860+
bb1: ; preds = %bb2
861+
%tmp = phi i32 [ %tmp6, %bb2 ]
862+
ret i32 %tmp
863+
864+
bb2: ; preds = %bb2, %bb
865+
%tmp3 = phi i32 [ %tmp8, %bb2 ], [ %y, %bb ]
866+
%iv = phi i32 [ %iv.next, %bb2 ], [ 0, %bb ]
867+
%tmp4 = phi i32 [ %tmp7, %bb2 ], [ 0, %bb ]
868+
%tmp5 = phi i32 [ %tmp6, %bb2 ], [ 0, %bb ]
869+
%g = getelementptr inbounds i32, i32* %x, i32 %iv
870+
%tmp6 = add i32 %tmp5, %tmp4
871+
%tmp7 = udiv i32 219220132, %tmp3
872+
store i32 %tmp3, i32* %g, align 4
873+
%tmp8 = add nsw i32 %tmp3, -1
874+
%iv.next = add nsw i32 %iv, 1
875+
%tmp9 = icmp slt i32 %tmp3, 2
876+
br i1 %tmp9, label %bb1, label %bb2, !prof !2
877+
}
878+
879+
!2 = !{!"branch_weights", i32 1, i32 1}

llvm/unittests/Transforms/Vectorize/VPlanTest.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,42 @@ TEST(VPInstructionTest, moveAfter) {
8888
EXPECT_EQ(I3->getParent(), I4->getParent());
8989
}
9090

91+
TEST(VPInstructionTest, moveBefore) {
92+
VPInstruction *I1 = new VPInstruction(0, {});
93+
VPInstruction *I2 = new VPInstruction(1, {});
94+
VPInstruction *I3 = new VPInstruction(2, {});
95+
96+
VPBasicBlock VPBB1;
97+
VPBB1.appendRecipe(I1);
98+
VPBB1.appendRecipe(I2);
99+
VPBB1.appendRecipe(I3);
100+
101+
I1->moveBefore(VPBB1, I3->getIterator());
102+
103+
CHECK_ITERATOR(VPBB1, I2, I1, I3);
104+
105+
VPInstruction *I4 = new VPInstruction(4, {});
106+
VPInstruction *I5 = new VPInstruction(5, {});
107+
VPBasicBlock VPBB2;
108+
VPBB2.appendRecipe(I4);
109+
VPBB2.appendRecipe(I5);
110+
111+
I3->moveBefore(VPBB2, I4->getIterator());
112+
113+
CHECK_ITERATOR(VPBB1, I2, I1);
114+
CHECK_ITERATOR(VPBB2, I3, I4, I5);
115+
EXPECT_EQ(I3->getParent(), I4->getParent());
116+
117+
VPBasicBlock VPBB3;
118+
119+
I4->moveBefore(VPBB3, VPBB3.end());
120+
121+
CHECK_ITERATOR(VPBB1, I2, I1);
122+
CHECK_ITERATOR(VPBB2, I3, I5);
123+
CHECK_ITERATOR(VPBB3, I4);
124+
EXPECT_EQ(&VPBB3, I4->getParent());
125+
}
126+
91127
TEST(VPInstructionTest, setOperand) {
92128
VPValue *VPV1 = new VPValue();
93129
VPValue *VPV2 = new VPValue();

0 commit comments

Comments
 (0)