Skip to content

Commit 1824795

Browse files
committed
[VPlan] Simplify Plan's entry in removeBranchOnConst.
1 parent b51a97b commit 1824795

File tree

331 files changed

+5687
-6454
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

331 files changed

+5687
-6454
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2357,9 +2357,9 @@ EpilogueVectorizerMainLoop::createIterationCountCheck(ElementCount VF,
23572357
/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
23582358
/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
23592359
/// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
2360-
static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB,
2360+
static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPlan &Plan, VPBasicBlock *VPBB,
23612361
BasicBlock *IRBB) {
2362-
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
2362+
VPIRBasicBlock *IRVPBB = Plan.createVPIRBasicBlock(IRBB);
23632363
auto IP = IRVPBB->begin();
23642364
for (auto &R : make_early_inc_range(VPBB->phis()))
23652365
R.moveBefore(*IRVPBB, IP);
@@ -2571,6 +2571,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
25712571
// Remove redundant induction instructions.
25722572
cse(HeaderBB);
25732573

2574+
if (Plan.getScalarPreheader()->getNumPredecessors() == 0)
2575+
return;
2576+
25742577
// Set/update profile weights for the vector and remainder loops as original
25752578
// loop iterations are now distributed among them. Note that original loop
25762579
// becomes the scalar remainder loop after vectorization.
@@ -7226,6 +7229,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72267229
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
72277230
VPlanTransforms::simplifyRecipes(BestVPlan);
72287231
VPlanTransforms::removeBranchOnConst(BestVPlan);
7232+
if (BestVPlan.getEntry()->getSingleSuccessor() ==
7233+
BestVPlan.getScalarPreheader()) {
7234+
// TODO: Should not even try to vectorize.
7235+
return DenseMap<const SCEV *, Value *>();
7236+
}
7237+
72297238
VPlanTransforms::narrowInterleaveGroups(
72307239
BestVPlan, BestVF,
72317240
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -7268,7 +7277,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72687277
BasicBlock *EntryBB =
72697278
cast<VPIRBasicBlock>(BestVPlan.getEntry())->getIRBasicBlock();
72707279
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
7271-
replaceVPBBWithIRVPBB(BestVPlan.getScalarPreheader(),
7280+
replaceVPBBWithIRVPBB(BestVPlan, BestVPlan.getScalarPreheader(),
72727281
State.CFG.PrevBB->getSingleSuccessor());
72737282
VPlanTransforms::removeDeadRecipes(BestVPlan);
72747283

@@ -7351,8 +7360,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73517360
} else {
73527361
// Keep all loop hints from the original loop on the vector loop (we'll
73537362
// replace the vectorizer-specific hints below).
7354-
if (MDNode *LID = OrigLoop->getLoopID())
7355-
L->setLoopID(LID);
7363+
if (BestVPlan.getScalarPreheader()->getNumPredecessors() > 0)
7364+
if (MDNode *LID = OrigLoop->getLoopID())
7365+
L->setLoopID(LID);
73567366

73577367
LoopVectorizeHints Hints(L, true, *ORE);
73587368
Hints.setAlreadyVectorized();
@@ -7383,6 +7393,16 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73837393
addRuntimeUnrollDisableMetaData(L);
73847394
}
73857395

7396+
if (BestVPlan.getScalarPreheader()->getNumPredecessors() == 0) {
7397+
// If the original loop became unreachable, we need to delete it.
7398+
auto Blocks = OrigLoop->getBlocksVector();
7399+
Blocks.push_back(cast<VPIRBasicBlock>(BestVPlan.getScalarPreheader())
7400+
->getIRBasicBlock());
7401+
for (auto *BB : Blocks)
7402+
LI->removeBlock(BB);
7403+
LI->erase(OrigLoop);
7404+
}
7405+
73867406
// 3. Fix the vectorized code: take care of header phi's, live-outs,
73877407
// predication, updating analyses.
73887408
ILV.fixVectorizedLoop(State);
@@ -7460,7 +7480,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
74607480
// generated here dominates the vector epilog iter check.
74617481
EPI.TripCount = Count;
74627482
} else {
7463-
VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
7483+
VectorPHVPBB =
7484+
replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
74647485
}
74657486

74667487
BranchInst &BI =
@@ -7493,7 +7514,7 @@ BasicBlock *EpilogueVectorizerEpilogueLoop::createVectorizedLoopSkeleton() {
74937514
BasicBlock *VecEpilogueIterationCountCheck =
74947515
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
74957516
nullptr, "vec.epilog.iter.check", true);
7496-
VectorPHVPBB = replaceVPBBWithIRVPBB(VectorPHVPBB, LoopVectorPreHeader);
7517+
VectorPHVPBB = replaceVPBBWithIRVPBB(Plan, VectorPHVPBB, LoopVectorPreHeader);
74977518

74987519
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
74997520
VecEpilogueIterationCountCheck);
@@ -10213,11 +10234,22 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1021310234
LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
1021410235
}
1021510236

10237+
if (ORE->allowExtraAnalysis(LV_NAME))
10238+
checkMixedPrecision(L, ORE);
10239+
1021610240
bool DisableRuntimeUnroll = false;
1021710241
MDNode *OrigLoopID = L->getLoopID();
10242+
bool LoopRemoved = false;
1021810243
{
1021910244
using namespace ore;
1022010245
if (!VectorizeLoop) {
10246+
ORE->emit([&]() {
10247+
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
10248+
L->getHeader())
10249+
<< "interleaved loop (interleaved count: "
10250+
<< NV("InterleaveCount", IC) << ")";
10251+
});
10252+
1022110253
assert(IC > 1 && "interleave count should not be 1 or 0");
1022210254
// If we decided that it is not legal to vectorize the loop, then
1022310255
// interleave it.
@@ -10234,14 +10266,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023410266
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
1023510267
VF.MinProfitableTripCount);
1023610268
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
10237-
10238-
ORE->emit([&]() {
10239-
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
10240-
L->getHeader())
10241-
<< "interleaved loop (interleaved count: "
10242-
<< NV("InterleaveCount", IC) << ")";
10243-
});
10269+
LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
1024410270
} else {
10271+
// Report the vectorization decision.
10272+
reportVectorization(ORE, L, VF, IC);
10273+
1024510274
// If we decided that it is *legal* to vectorize the loop, then do it.
1024610275

1024710276
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
@@ -10311,23 +10340,23 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1031110340
// rarely used is not worth unrolling.
1031210341
if (!Checks.hasChecks())
1031310342
DisableRuntimeUnroll = true;
10343+
LoopRemoved = BestPlan.getScalarPreheader()->getNumPredecessors() == 0;
1031410344
}
10315-
// Report the vectorization decision.
10316-
reportVectorization(ORE, L, VF, IC);
1031710345
}
10318-
10319-
if (ORE->allowExtraAnalysis(LV_NAME))
10320-
checkMixedPrecision(L, ORE);
1032110346
}
1032210347

1032310348
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1032410349
"DT not preserved correctly");
1032510350

10351+
if (LoopRemoved)
10352+
return true;
10353+
1032610354
std::optional<MDNode *> RemainderLoopID =
1032710355
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1032810356
LLVMLoopVectorizeFollowupEpilogue});
1032910357
if (RemainderLoopID) {
10330-
L->setLoopID(*RemainderLoopID);
10358+
if (!LoopRemoved)
10359+
L->setLoopID(*RemainderLoopID);
1033110360
} else {
1033210361
if (DisableRuntimeUnroll)
1033310362
addRuntimeUnrollDisableMetaData(L);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -972,12 +972,14 @@ void VPlan::execute(VPTransformState *State) {
972972
setName("Final VPlan");
973973
LLVM_DEBUG(dump());
974974

975-
// Disconnect scalar preheader and scalar header, as the dominator tree edge
976-
// will be updated as part of VPlan execution. This allows keeping the DTU
977-
// logic generic during VPlan execution.
978975
BasicBlock *ScalarPh = State->CFG.ExitBB;
979-
State->CFG.DTU.applyUpdates(
980-
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
976+
if (getScalarPreheader()->getNumPredecessors() > 0) {
977+
// Disconnect scalar preheader and scalar header, as the dominator tree edge
978+
// will be updated as part of VPlan execution. This allows keeping the DTU
979+
// logic generic during VPlan execution.
980+
State->CFG.DTU.applyUpdates(
981+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
982+
}
981983

982984
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
983985
Entry);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1920,7 +1920,7 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
19201920
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
19211921
vp_depth_first_shallow(Plan.getEntry()))) {
19221922
VPValue *Cond;
1923-
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
1923+
if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
19241924
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
19251925
continue;
19261926

llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) {
5252
; CHECK-NEXT: [[TMP17]] = add <2 x i64> [[VEC_PHI]], [[PREDPHI]]
5353
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5454
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
55-
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
56-
; CHECK: middle.block:
55+
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
56+
; CHECK: for.end:
5757
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP17]])
5858
; CHECK-NEXT: ret i64 [[TMP19]]
5959
;

llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,8 @@ exit:
202202
define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i1 %c.0) {
203203
; CHECK-LABEL: define void @test_blend_feeding_replicated_store_2(
204204
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i1 [[C_0:%.*]]) {
205-
; CHECK-NEXT: [[ENTRY:.*]]:
206-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
205+
; CHECK-NEXT: [[ENTRY:.*:]]
206+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
207207
; CHECK: [[VECTOR_PH]]:
208208
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
209209
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
@@ -366,12 +366,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
366366
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
367367
; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
368368
; CHECK: [[MIDDLE_BLOCK]]:
369-
; CHECK-NEXT: br label %[[SCALAR_PH]]
369+
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
370370
; CHECK: [[SCALAR_PH]]:
371-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
372371
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
373372
; CHECK: [[LOOP_HEADER]]:
374-
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
373+
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 96, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
375374
; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV1]]
376375
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
377376
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ target triple = "arm64-apple-macosx11.0.0"
66
define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
77
; CHECK-LABEL: define void @fshl_operand_first_order_recurrence(
88
; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[SRC:%.*]]) {
9-
; CHECK-NEXT: [[ENTRY:.*]]:
10-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1111
; CHECK: [[VECTOR_PH]]:
1212
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1313
; CHECK: [[VECTOR_BODY]]:
@@ -30,14 +30,12 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
3030
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3131
; CHECK: [[MIDDLE_BLOCK]]:
3232
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
33-
; CHECK-NEXT: br label %[[SCALAR_PH]]
33+
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
3434
; CHECK: [[SCALAR_PH]]:
35-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
36-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
3735
; CHECK-NEXT: br label %[[LOOP:.*]]
3836
; CHECK: [[LOOP]]:
39-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
40-
; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
37+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
38+
; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
4139
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
4240
; CHECK-NEXT: [[L]] = load i64, ptr [[GEP_SRC]], align 8
4341
; CHECK-NEXT: [[OR:%.*]] = tail call i64 @llvm.fshl.i64(i64 1, i64 [[RECUR]], i64 1)
@@ -73,7 +71,7 @@ define void @powi_call(ptr %P) {
7371
; CHECK-LABEL: define void @powi_call(
7472
; CHECK-SAME: ptr [[P:%.*]]) {
7573
; CHECK-NEXT: [[ENTRY:.*:]]
76-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
74+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
7775
; CHECK: [[VECTOR_PH]]:
7876
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7977
; CHECK: [[VECTOR_BODY]]:
@@ -83,7 +81,7 @@ define void @powi_call(ptr %P) {
8381
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
8482
; CHECK: [[MIDDLE_BLOCK]]:
8583
; CHECK-NEXT: br label %[[EXIT:.*]]
86-
; CHECK: [[SCALAR_PH]]:
84+
; CHECK: [[SCALAR_PH:.*]]:
8785
; CHECK-NEXT: br label %[[LOOP:.*]]
8886
; CHECK: [[LOOP]]:
8987
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -93,7 +91,7 @@ define void @powi_call(ptr %P) {
9391
; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8
9492
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
9593
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
96-
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
94+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]]
9795
; CHECK: [[EXIT]]:
9896
; CHECK-NEXT: ret void
9997
;
@@ -224,5 +222,4 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
224222
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
225223
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
226224
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
227-
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
228225
;.

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
55
; CHECK-LABEL: define void @clamped_tc_8(
66
; CHECK-SAME: ptr captures(none) [[DST:%.*]], i32 [[N:%.*]], i64 [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
8+
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1111
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -36,7 +36,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
3636
; CHECK: scalar.ph:
3737
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
3838
; CHECK: for.body:
39-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
39+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
4040
; CHECK-NEXT: [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[DST]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
4141
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
4242
; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP19]]
@@ -45,7 +45,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
4545
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
4646
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4747
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
48-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
48+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
4949
; CHECK: for.cond.cleanup:
5050
; CHECK-NEXT: ret void
5151
;
@@ -79,7 +79,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
7979
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[REM]], 7
8080
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 3
8181
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
82-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
82+
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
8383
; CHECK: vector.ph:
8484
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
8585
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
@@ -104,13 +104,13 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
104104
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
105105
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
106106
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
107-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
107+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
108108
; CHECK: middle.block:
109109
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
110110
; CHECK: scalar.ph:
111111
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
112112
; CHECK: for.body:
113-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
113+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
114114
; CHECK-NEXT: [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[DST]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
115115
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
116116
; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP19]]
@@ -119,7 +119,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
119119
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
120120
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
121121
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
122-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
122+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
123123
; CHECK: for.cond.cleanup.loopexit:
124124
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
125125
; CHECK: for.cond.cleanup:
@@ -156,7 +156,5 @@ for.cond.cleanup: ; preds = %for.body
156156
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
157157
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
158158
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
159-
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
160-
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
161-
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
159+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
162160
;.

0 commit comments

Comments
 (0)