Skip to content

Commit 6440a91

Browse files
committed
!fixup address first set of comments, thanks
1 parent 31c1c5f commit 6440a91

File tree

7 files changed

+95
-52
lines changed

7 files changed

+95
-52
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 82 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -517,14 +517,15 @@ class InnerLoopVectorizer {
517517
/// Fix the non-induction PHIs in \p Plan.
518518
void fixNonInductionPHIs(VPTransformState &State);
519519

520-
/// Create the bypass resume value coming from the additional bypass block. \p
521-
/// Step is the SCEV-expanded induction step to use. \p MainVectorTripCount
522-
/// provides the trip count of the main vector loop, used to compute the
523-
/// resume value reaching the scalar loop preheader directly from this
524-
/// additional bypass block.
525-
void createInductionBypassValue(PHINode *OrigPhi,
526-
const InductionDescriptor &ID, Value *Step,
527-
Value *MainVectorTripCount);
520+
/// Create and record the bypass resume value for an induction Phi coming from
521+
/// the additional bypass block. \p Step is the SCEV-expanded induction step
522+
/// to use. \p MainVectorTripCount provides the trip count of the main vector
523+
/// loop, used to compute the resume value reaching the scalar loop preheader
524+
/// directly from this additional bypass block.
525+
void createInductionAdditionalBypassValue(PHINode *OrigPhi,
526+
const InductionDescriptor &ID,
527+
Value *Step,
528+
Value *MainVectorTripCount);
528529

529530
/// Returns the original loop trip count.
530531
Value *getTripCount() const { return TripCount; }
@@ -581,10 +582,10 @@ class InnerLoopVectorizer {
581582
/// vector loop preheader, middle block and scalar preheader.
582583
void createVectorLoopSkeleton(StringRef Prefix);
583584

584-
/// Create values for the induction variables to resume iteration count
585-
/// in the bypass block.
586-
void createInductionBypassValues(const SCEV2ValueTy &ExpandedSCEVs,
587-
Value *MainVectorTripCount);
585+
/// Create and record the values for induction variables to resume coming from
586+
/// the additional bypass block.
587+
void createInductionAdditionalBypassValues(const SCEV2ValueTy &ExpandedSCEVs,
588+
Value *MainVectorTripCount);
588589

589590
/// Allow subclasses to override and print debug traces before/after vplan
590591
/// execution, when trace information is requested.
@@ -2604,10 +2605,12 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
26042605
nullptr, Twine(Prefix) + "scalar.ph");
26052606
}
26062607

2607-
void InnerLoopVectorizer::createInductionBypassValue(
2608+
void InnerLoopVectorizer::createInductionAdditionalBypassValue(
26082609
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
26092610
Value *MainVectorTripCount) {
26102611
Instruction *OldInduction = Legal->getPrimaryInduction();
2612+
// For the primary induction the additional bypass end value is known.
2613+
// Otherwise it is computed.
26112614
Value *EndValueFromAdditionalBypass = MainVectorTripCount;
26122615
if (OrigPhi != OldInduction) {
26132616
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
@@ -2616,7 +2619,7 @@ void InnerLoopVectorizer::createInductionBypassValue(
26162619
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
26172620
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
26182621

2619-
// Compute the end value for the additional bypass (if applicable).
2622+
// Compute the end value for the additional bypass.
26202623
if (MainVectorTripCount) {
26212624
B.SetInsertPoint(getAdditionalBypassBlock(),
26222625
getAdditionalBypassBlock()->getFirstInsertionPt());
@@ -2672,15 +2675,15 @@ static void addFullyUnrolledInstructionsToIgnore(
26722675
}
26732676
}
26742677

2675-
void InnerLoopVectorizer::createInductionBypassValues(
2678+
void InnerLoopVectorizer::createInductionAdditionalBypassValues(
26762679
const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
26772680
assert(MainVectorTripCount && "Must have bypass information");
26782681

26792682
for (const auto &InductionEntry : Legal->getInductionVars()) {
26802683
PHINode *OrigPhi = InductionEntry.first;
26812684
const InductionDescriptor &II = InductionEntry.second;
2682-
createInductionBypassValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2683-
MainVectorTripCount);
2685+
createInductionAdditionalBypassValue(
2686+
OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), MainVectorTripCount);
26842687
}
26852688
}
26862689

@@ -2741,9 +2744,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27412744
// faster.
27422745
emitMemRuntimeChecks(LoopScalarPreHeader);
27432746

2744-
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
2745-
assert(VectorTripCount && "Expected valid arguments");
2746-
27472747
return LoopVectorPreHeader;
27482748
}
27492749

@@ -7736,8 +7736,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77367736
//===------------------------------------------------===//
77377737

77387738
// 2. Copy and widen instructions from the old loop into the new loop.
7739-
BestVPlan.prepareToExecute(ILV.getTripCount(),
7740-
ILV.getOrCreateVectorTripCount(nullptr), State);
7739+
BestVPlan.prepareToExecute(
7740+
ILV.getTripCount(),
7741+
ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State);
77417742

77427743
BestVPlan.execute(&State);
77437744

@@ -7844,8 +7845,6 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78447845
// Generate the induction variable.
78457846
EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
78467847

7847-
createInductionResumeVPValues(ExpandedSCEVs, nullptr, &WideIVs);
7848-
78497848
return LoopVectorPreHeader;
78507849
}
78517850

@@ -8010,14 +8009,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80108009
Phi->removeIncomingValue(EPI.MemSafetyCheck);
80118010
}
80128011

8013-
// Generate induction resume values. These variables save the new starting
8014-
// indexes for the scalar loop. They are used to test if there are any tail
8015-
// iterations left once the vector loop has completed.
8016-
// Note that when the vectorized epilogue is skipped due to iteration count
8017-
// check, then the resume value for the induction variable comes from
8018-
// the trip count of the main vector loop, passed as the second argument.
8019-
createInductionResumeVPValues(ExpandedSCEVs, EPI.VectorTripCount);
8020-
8012+
// Generate bypass values from the additional bypass block. Note that when the
8013+
// vectorized epilogue is skipped due to iteration count check, then the
8014+
// resume value for the induction variable comes from the trip count of the
8015+
// main vector loop, passed as the second argument.
8016+
createInductionAdditionalBypassValues(ExpandedSCEVs, EPI.VectorTripCount);
80218017
return LoopVectorPreHeader;
80228018
}
80238019

@@ -8822,30 +8818,33 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
88228818
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
88238819
}
88248820

8821+
/// Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the
8822+
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8823+
/// the end value of the induction.
88258824
static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
8826-
VPBuilder &Builder,
8825+
VPBuilder &VectorPHBuilder,
88278826
VPBuilder &ScalarPHBuilder,
88288827
VPTypeAnalysis &TypeInfo,
88298828
VPValue *VectorTC) {
88308829
PHINode *OrigPhi;
88318830
const InductionDescriptor *ID;
8832-
VPValue *Start;
8831+
VPValue *Start = PhiR->getStartValue();
88338832
VPValue *Step;
88348833
Type *ScalarTy;
88358834
bool IsCanonical = false;
88368835
if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(PhiR)) {
8836+
// Truncated wide inductions resume from the last lane of their vector value
8837+
// in the last vector iteration.
88378838
if (WideIV->getTruncInst())
88388839
return nullptr;
88398840
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
88408841
ID = &WideIV->getInductionDescriptor();
8841-
Start = WideIV->getStartValue();
88428842
Step = WideIV->getStepValue();
88438843
ScalarTy = WideIV->getScalarType();
88448844
IsCanonical = WideIV->isCanonical();
88458845
} else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(PhiR)) {
88468846
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
88478847
ID = &WideIV->getInductionDescriptor();
8848-
Start = WideIV->getStartValue();
88498848
Step = WideIV->getOperand(1);
88508849
ScalarTy = Start->getLiveInIRValue()->getType();
88518850
} else {
@@ -8854,14 +8853,17 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
88548853

88558854
VPValue *EndValue = VectorTC;
88568855
if (!IsCanonical) {
8857-
EndValue = Builder.createDerivedIV(
8856+
EndValue = VectorPHBuilder.createDerivedIV(
88588857
ID->getKind(),
88598858
dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp()), Start,
88608859
VectorTC, Step);
88618860
}
88628861

8862+
// EndValue is based on the vector trip count (which has the same type as the
8863+
// widest induction) and thus may be wider than the induction here.
88638864
if (ScalarTy != TypeInfo.inferScalarType(EndValue)) {
8864-
EndValue = Builder.createScalarCast(Instruction::Trunc, EndValue, ScalarTy);
8865+
EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
8866+
ScalarTy);
88658867
}
88668868

88678869
auto *ResumePhiRecipe =
@@ -8870,10 +8872,12 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR,
88708872
return ResumePhiRecipe;
88718873
}
88728874

8873-
/// Create resume phis in the scalar preheader for first-order recurrences and
8874-
/// reductions and update the VPIRInstructions wrapping the original phis in the
8875-
/// scalar header.
8876-
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8875+
/// Create resume phis in the scalar preheader for first-order recurrences,
8876+
/// reductions and inductions, and update the VPIRInstructions wrapping the
8877+
/// original phis in the scalar header.
8878+
static void addScalarResumePhis(
8879+
VPlan &Plan,
8880+
function_ref<VPHeaderPHIRecipe *(PHINode *)> GetHeaderPhiRecipe) {
88778881
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
88788882
auto *ScalarPH = Plan.getScalarPreheader();
88798883
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8888,7 +8892,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
88888892
auto *ScalarPhiI = dyn_cast<PHINode>(&ScalarPhiIRI->getInstruction());
88898893
if (!ScalarPhiI)
88908894
break;
8891-
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
8895+
auto *VectorPhiR = GetHeaderPhiRecipe(ScalarPhiI);
88928896

88938897
if (VPValue *ResumePhi = addResumeValuesForInduction(
88948898
VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -9277,7 +9281,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92779281
VPlanTransforms::handleUncountableEarlyExit(
92789282
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
92799283
}
9280-
addScalarResumePhis(RecipeBuilder, *Plan);
9284+
addScalarResumePhis(*Plan, [&RecipeBuilder](PHINode *P) {
9285+
return cast<VPHeaderPHIRecipe>(RecipeBuilder.getRecipe(P));
9286+
});
92819287
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
92829288
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
92839289
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9399,6 +9405,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
93999405
bool HasNUW = true;
94009406
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
94019407
DebugLoc());
9408+
9409+
addScalarResumePhis(
9410+
*Plan,
9411+
[&Plan](PHINode *P) {
9412+
return find_singleton<VPHeaderPHIRecipe>(
9413+
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis(),
9414+
[P](VPRecipeBase &R, bool) -> VPHeaderPHIRecipe * {
9415+
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9416+
return HeaderR->getUnderlyingValue() == P ? HeaderR : nullptr;
9417+
});
9418+
}
9419+
9420+
);
9421+
94029422
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
94039423
return Plan;
94049424
}
@@ -10490,7 +10510,24 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1049010510
Constant::getNullValue(IRI->getType())));
1049110511
ResumePhi->eraseFromParent();
1049210512
}
10493-
VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10513+
// VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10514+
10515+
using namespace VPlanPatternMatch;
10516+
VPBasicBlock *ScalarPHVPBB = BestMainPlan->getScalarPreheader();
10517+
VPValue *VectorTC = &BestMainPlan->getVectorTripCount();
10518+
if (none_of(*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) {
10519+
return match(&R, m_VPInstruction<VPInstruction::ResumePhi>(
10520+
m_Specific(VectorTC), m_SpecificInt(0)));
10521+
})) {
10522+
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
10523+
// When vectorizing the epilogue, create a resume phi for the
10524+
// canonical IV if no suitable resume phi was already created.
10525+
ScalarPHBuilder.createNaryOp(
10526+
VPInstruction::ResumePhi,
10527+
{VectorTC, BestMainPlan->getOrAddLiveIn(ConstantInt::get(
10528+
LVL.getWidestInductionType(), 0))},
10529+
{}, "vec.epilog.resume.val");
10530+
}
1049410531

1049510532
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
1049610533
*BestMainPlan, MainILV, DT, false);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
6565
case VPInstruction::FirstOrderRecurrenceSplice:
6666
case VPInstruction::LogicalAnd:
6767
case VPInstruction::PtrAdd:
68-
case VPInstruction::ResumePhi:
6968
return false;
7069
default:
7170
return true;

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
7474
; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
7575
; CHECK-VS1-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
7676
; CHECK-VS1-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16
77+
; CHECK-VS1-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]]
7778
; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
7879
; CHECK-VS1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
7980
; CHECK-VS1-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -182,6 +183,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef
182183
; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
183184
; CHECK-VS2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
184185
; CHECK-VS2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8
186+
; CHECK-VS2-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]]
185187
; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0
186188
; CHECK-VS2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
187189
; CHECK-VS2-NEXT: br label %[[VECTOR_BODY:.*]]

llvm/test/Transforms/LoopVectorize/X86/small-size.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ define void @example2(i32 %n, i32 %x) optsize {
151151
; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT20]]
152152
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i64 0
153153
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
154+
; CHECK: pred.store.if20:
155+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [2048 x i32], ptr @b, i64 0, i64 [[OFFSET_IDX]]
154156
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
155157
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [2048 x i32], ptr @c, i64 0, i64 [[OFFSET_IDX]]
156158
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4

llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,9 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) {
241241
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
242242
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
243243
; CHECK: vec.epilog.ph:
244-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
245244
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
246245
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ false, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
246+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
247247
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i1 [[BC_MERGE_RDX]], false
248248
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4
249249
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
@@ -331,6 +331,8 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) {
331331
; CHECK: vector.ph:
332332
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
333333
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
334+
; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[N_VEC]], 16
335+
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP50]]
334336
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
335337
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
336338
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) {
4040
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
4141
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOP_EXIT]], label %[[SCALAR_PH]]
4242
; CHECK-VF4IC1: [[SCALAR_PH]]:
43-
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ]
4443
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_OUTER]], %[[OUTER_LOOP]] ]
44+
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ]
4545
; CHECK-VF4IC1-NEXT: br label %[[INNER_LOOP:.*]]
4646
; CHECK-VF4IC1: [[INNER_LOOP]]:
4747
; CHECK-VF4IC1-NEXT: [[RDX_INNER:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SELECT:%.*]], %[[INNER_LOOP]] ]
@@ -114,8 +114,8 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) {
114114
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
115115
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOP_EXIT]], label %[[SCALAR_PH]]
116116
; CHECK-VF4IC4: [[SCALAR_PH]]:
117-
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ]
118117
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_OUTER]], %[[OUTER_LOOP]] ]
118+
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ]
119119
; CHECK-VF4IC4-NEXT: br label %[[INNER_LOOP:.*]]
120120
; CHECK-VF4IC4: [[INNER_LOOP]]:
121121
; CHECK-VF4IC4-NEXT: [[RDX_INNER:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SELECT:%.*]], %[[INNER_LOOP]] ]
@@ -189,8 +189,8 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) {
189189
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
190190
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOP_EXIT]], label %[[SCALAR_PH]]
191191
; CHECK-VF1IC4: [[SCALAR_PH]]:
192-
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ]
193192
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_OUTER]], %[[OUTER_LOOP]] ]
193+
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ]
194194
; CHECK-VF1IC4-NEXT: br label %[[INNER_LOOP:.*]]
195195
; CHECK-VF1IC4: [[INNER_LOOP]]:
196196
; CHECK-VF1IC4-NEXT: [[RDX_INNER:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SELECT:%.*]], %[[INNER_LOOP]] ]

0 commit comments

Comments
 (0)