Skip to content

Commit 69b05fe

Browse files
committed
[LV] Optimise latch exit induction users for some early exit loops
This is the first of two PRs that attempts to improve the IR generated in the exit blocks of vectorised loops with uncountable early exits. In this PR I am improving the generated code for users of induction variables in early exit loops that have a unique exit block, when exiting via the latch. I intend to follow this up very soon with another patch to optimise the code for induction users in the vector.early.exit block.
1 parent 85cf958 commit 69b05fe

File tree

2 files changed

+69
-90
lines changed

2 files changed

+69
-90
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 61 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -730,67 +730,74 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {
730730
return IsWideIVInc() ? WideIV : nullptr;
731731
}
732732

733-
void VPlanTransforms::optimizeInductionExitUsers(
734-
VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
733+
static VPValue *
734+
optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
735+
VPBlockBase *PredVPBB, VPValue *Op,
736+
DenseMap<VPValue *, VPValue *> &EndValues) {
735737
using namespace VPlanPatternMatch;
736-
SmallVector<VPIRBasicBlock *> ExitVPBBs(Plan.getExitBlocks());
737-
if (ExitVPBBs.size() != 1)
738-
return;
739738

740-
VPIRBasicBlock *ExitVPBB = ExitVPBBs[0];
741-
VPBlockBase *PredVPBB = ExitVPBB->getSinglePredecessor();
742-
if (!PredVPBB)
743-
return;
744-
assert(PredVPBB == Plan.getMiddleBlock() &&
745-
"predecessor must be the middle block");
746-
747-
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
748-
VPBuilder B(Plan.getMiddleBlock()->getTerminator());
749-
for (VPRecipeBase &R : *ExitVPBB) {
750-
auto *ExitIRI = cast<VPIRInstruction>(&R);
751-
if (!isa<PHINode>(ExitIRI->getInstruction()))
752-
break;
739+
VPValue *Incoming;
740+
if (!match(Op, m_VPInstruction<VPInstruction::ExtractFromEnd>(
741+
m_VPValue(Incoming), m_SpecificInt(1))))
742+
return nullptr;
753743

754-
VPValue *Incoming;
755-
if (!match(ExitIRI->getOperand(0),
756-
m_VPInstruction<VPInstruction::ExtractFromEnd>(
757-
m_VPValue(Incoming), m_SpecificInt(1))))
758-
continue;
744+
auto *WideIV = getOptimizableIVOf(Incoming);
745+
if (!WideIV)
746+
return nullptr;
759747

760-
auto *WideIV = getOptimizableIVOf(Incoming);
761-
if (!WideIV)
762-
continue;
763-
VPValue *EndValue = EndValues.lookup(WideIV);
764-
assert(EndValue && "end value must have been pre-computed");
748+
VPValue *EndValue = EndValues.lookup(WideIV);
749+
assert(EndValue && "end value must have been pre-computed");
750+
751+
// This only happens if Incoming is the increment of an induction recipe.
752+
if (Incoming != WideIV)
753+
return EndValue;
754+
755+
// Otherwise subtract the step from the EndValue.
756+
VPBuilder B(cast<VPBasicBlock>(PredVPBB)->getTerminator());
757+
VPValue *Step = WideIV->getStepValue();
758+
Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
759+
VPValue *Escape = nullptr;
760+
if (ScalarTy->isIntegerTy()) {
761+
Escape =
762+
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
763+
} else if (ScalarTy->isPointerTy()) {
764+
auto *Zero = Plan.getOrAddLiveIn(
765+
ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
766+
Escape =
767+
B.createPtrAdd(EndValue, B.createNaryOp(Instruction::Sub, {Zero, Step}),
768+
{}, "ind.escape");
769+
} else if (ScalarTy->isFloatingPointTy()) {
770+
const auto &ID = WideIV->getInductionDescriptor();
771+
Escape = B.createNaryOp(
772+
ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
773+
? Instruction::FSub
774+
: Instruction::FAdd,
775+
{EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
776+
} else {
777+
llvm_unreachable("all possible induction types must be handled");
778+
}
779+
return Escape;
780+
}
765781

766-
if (Incoming != WideIV) {
767-
ExitIRI->setOperand(0, EndValue);
768-
continue;
769-
}
782+
void VPlanTransforms::optimizeInductionExitUsers(
783+
VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
784+
VPBlockBase *MiddleVPBB = Plan.getMiddleBlock();
785+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
786+
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
787+
for (VPRecipeBase &R : *ExitVPBB) {
788+
auto *ExitIRI = cast<VPIRInstruction>(&R);
789+
if (!isa<PHINode>(ExitIRI->getInstruction()))
790+
break;
770791

771-
VPValue *Escape = nullptr;
772-
VPValue *Step = WideIV->getStepValue();
773-
Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
774-
if (ScalarTy->isIntegerTy()) {
775-
Escape =
776-
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
777-
} else if (ScalarTy->isPointerTy()) {
778-
auto *Zero = Plan.getOrAddLiveIn(
779-
ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
780-
Escape = B.createPtrAdd(EndValue,
781-
B.createNaryOp(Instruction::Sub, {Zero, Step}),
782-
{}, "ind.escape");
783-
} else if (ScalarTy->isFloatingPointTy()) {
784-
const auto &ID = WideIV->getInductionDescriptor();
785-
Escape = B.createNaryOp(
786-
ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
787-
? Instruction::FSub
788-
: Instruction::FAdd,
789-
{EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
790-
} else {
791-
llvm_unreachable("all possible induction types must be handled");
792+
for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {
793+
if (PredVPBB == MiddleVPBB)
794+
if (VPValue *Escape = optimizeLatchExitInductionUser(
795+
Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx),
796+
EndValues))
797+
ExitIRI->setOperand(Idx, Escape);
798+
// TODO: Optimize early exit induction users in follow-on patch.
799+
}
792800
}
793-
ExitIRI->setOperand(0, Escape);
794801
}
795802
}
796803

llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,6 @@ define i64 @same_exit_block_pre_inc_use2() {
351351
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
352352
; CHECK: vector.body:
353353
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
354-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
355354
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
356355
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
357356
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -365,11 +364,9 @@ define i64 @same_exit_block_pre_inc_use2() {
365364
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
366365
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
367366
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
368-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
369367
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
370368
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
371369
; CHECK: middle.split:
372-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
373370
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
374371
; CHECK: middle.block:
375372
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
@@ -391,7 +388,7 @@ define i64 @same_exit_block_pre_inc_use2() {
391388
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
392389
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]]
393390
; CHECK: loop.end:
394-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
391+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
395392
; CHECK-NEXT: ret i64 [[RETVAL]]
396393
;
397394
entry:
@@ -451,7 +448,6 @@ define i64 @same_exit_block_pre_inc_use3() {
451448
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
452449
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
453450
; CHECK: middle.split:
454-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
455451
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
456452
; CHECK: middle.block:
457453
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
@@ -475,7 +471,7 @@ define i64 @same_exit_block_pre_inc_use3() {
475471
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
476472
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]]
477473
; CHECK: loop.end:
478-
; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
474+
; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
479475
; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
480476
;
481477
entry:
@@ -597,20 +593,13 @@ define i64 @same_exit_block_post_inc_use() {
597593
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
598594
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
599595
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
600-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
601-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
602-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
603596
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
604597
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
605598
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
606599
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
607600
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
608601
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
609602
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
610-
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP0]], 1
611-
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP1]], 1
612-
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP2]], 1
613-
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP3]], 1
614603
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
615604
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
616605
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
@@ -642,7 +631,7 @@ define i64 @same_exit_block_post_inc_use() {
642631
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
643632
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP17:![0-9]+]]
644633
; CHECK: loop.end:
645-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
634+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
646635
; CHECK-NEXT: ret i64 [[RETVAL]]
647636
;
648637
entry:
@@ -684,7 +673,6 @@ define i64 @same_exit_block_post_inc_use2() {
684673
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
685674
; CHECK: vector.body:
686675
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
687-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
688676
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
689677
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
690678
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
@@ -709,11 +697,9 @@ define i64 @same_exit_block_post_inc_use2() {
709697
; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true)
710698
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
711699
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
712-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
713700
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
714701
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
715702
; CHECK: middle.split:
716-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
717703
; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
718704
; CHECK: middle.block:
719705
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
@@ -737,7 +723,7 @@ define i64 @same_exit_block_post_inc_use2() {
737723
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
738724
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP19:![0-9]+]]
739725
; CHECK: loop.end:
740-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
726+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
741727
; CHECK-NEXT: ret i64 [[RETVAL]]
742728
;
743729
entry:
@@ -869,7 +855,6 @@ define i64 @diff_exit_block_pre_inc_use2() {
869855
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
870856
; CHECK: vector.body:
871857
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
872-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
873858
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
874859
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
875860
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -883,13 +868,11 @@ define i64 @diff_exit_block_pre_inc_use2() {
883868
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
884869
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
885870
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
886-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
887871
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
888872
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
889873
; CHECK: middle.split:
890874
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
891875
; CHECK: middle.block:
892-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
893876
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
894877
; CHECK: vector.early.exit:
895878
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
@@ -912,7 +895,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
912895
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
913896
; CHECK-NEXT: ret i64 [[RETVAL1]]
914897
; CHECK: loop.end:
915-
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
898+
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
916899
; CHECK-NEXT: ret i64 [[RETVAL2]]
917900
;
918901
entry:
@@ -978,7 +961,6 @@ define i64 @diff_exit_block_pre_inc_use3() {
978961
; CHECK: middle.split:
979962
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
980963
; CHECK: middle.block:
981-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
982964
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
983965
; CHECK: vector.early.exit:
984966
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
@@ -1003,7 +985,7 @@ define i64 @diff_exit_block_pre_inc_use3() {
1003985
; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
1004986
; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
1005987
; CHECK: loop.end:
1006-
; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
988+
; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
1007989
; CHECK-NEXT: ret i64 [[INDEX_LCSSA1]]
1008990
;
1009991
entry:
@@ -1050,20 +1032,13 @@ define i64 @diff_exit_block_post_inc_use1() {
10501032
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
10511033
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
10521034
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1053-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
1054-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
1055-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
10561035
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
10571036
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
10581037
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
10591038
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
10601039
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
10611040
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
10621041
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
1063-
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP0]], 1
1064-
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP1]], 1
1065-
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP2]], 1
1066-
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP3]], 1
10671042
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
10681043
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
10691044
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
@@ -1098,7 +1073,7 @@ define i64 @diff_exit_block_post_inc_use1() {
10981073
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
10991074
; CHECK-NEXT: ret i64 [[RETVAL1]]
11001075
; CHECK: loop.end:
1101-
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
1076+
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ]
11021077
; CHECK-NEXT: ret i64 [[RETVAL2]]
11031078
;
11041079
entry:
@@ -1144,7 +1119,6 @@ define i64 @diff_exit_block_post_inc_use2() {
11441119
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
11451120
; CHECK: vector.body:
11461121
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
1147-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
11481122
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
11491123
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
11501124
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
@@ -1169,13 +1143,11 @@ define i64 @diff_exit_block_post_inc_use2() {
11691143
; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true)
11701144
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
11711145
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
1172-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
11731146
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
11741147
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
11751148
; CHECK: middle.split:
11761149
; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
11771150
; CHECK: middle.block:
1178-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
11791151
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
11801152
; CHECK: vector.early.exit:
11811153
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true)
@@ -1200,7 +1172,7 @@ define i64 @diff_exit_block_post_inc_use2() {
12001172
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
12011173
; CHECK-NEXT: ret i64 [[RETVAL1]]
12021174
; CHECK: loop.end:
1203-
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
1175+
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
12041176
; CHECK-NEXT: ret i64 [[RETVAL2]]
12051177
;
12061178
entry:

0 commit comments

Comments
 (0)