Skip to content

Commit 50b9ca4

Browse files
authored
[VPlan] Simplify Plan's entry in removeBranchOnConst. (#154510)
After #153643, there may be a BranchOnCond with constant condition in the entry block. Simplify those in removeBranchOnConst. This removes a number of redundant conditional branch from entry blocks. In some cases, it may also make the original scalar loop unreachable, because we know it will never execute. In that case, we need to remove the loop from LoopInfo, because all unreachable blocks may dominate each other, making LoopInfo invalid. In those cases, we can also completely remove the loop, for which I'll share a follow-up patch. Depends on #153643. PR: #154510
1 parent a858c90 commit 50b9ca4

File tree

349 files changed

+5840
-6621
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

349 files changed

+5840
-6621
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -533,23 +533,6 @@ LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI,
533533
ReplaceExitVal ReplaceExitValue,
534534
SmallVector<WeakTrackingVH, 16> &DeadInsts);
535535

536-
/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
537-
/// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p
538-
/// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that
539-
/// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect
540-
/// the remaining TC%UF iterations.
541-
///
542-
/// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p
543-
/// RemainderLoop in which case weights for \p OrigLoop are updated accordingly.
544-
/// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are
545-
/// equal. \p UF must be greater than zero.
546-
/// If \p OrigLoop has no profile info associated nothing happens.
547-
///
548-
/// This utility may be useful for such optimizations as unroller and
549-
/// vectorizer as it's typical transformation for them.
550-
LLVM_ABI void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
551-
Loop *RemainderLoop, uint64_t UF);
552-
553536
/// Utility that implements appending of loops onto a worklist given a range.
554537
/// We want to process loops in postorder, but the worklist is a LIFO data
555538
/// structure, so we append to it in *reverse* postorder.

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,32 +1865,6 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
18651865
return NumReplaced;
18661866
}
18671867

1868-
/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
1869-
/// \p OrigLoop.
1870-
void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
1871-
Loop *RemainderLoop, uint64_t UF) {
1872-
assert(UF > 0 && "Zero unrolled factor is not supported");
1873-
assert(UnrolledLoop != RemainderLoop &&
1874-
"Unrolled and Remainder loops are expected to distinct");
1875-
1876-
// Get number of iterations in the original scalar loop.
1877-
unsigned OrigLoopInvocationWeight = 0;
1878-
std::optional<unsigned> OrigAverageTripCount =
1879-
getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
1880-
if (!OrigAverageTripCount)
1881-
return;
1882-
1883-
// Calculate number of iterations in unrolled loop.
1884-
unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
1885-
// Calculate number of iterations for remainder loop.
1886-
unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;
1887-
1888-
setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
1889-
OrigLoopInvocationWeight);
1890-
setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
1891-
OrigLoopInvocationWeight);
1892-
}
1893-
18941868
/// Utility that implements appending of loops onto a worklist.
18951869
/// Loops are added in preorder (analogous for reverse postorder for trees),
18961870
/// and the worklist is processed LIFO.

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -570,12 +570,18 @@ class LoopVectorizationPlanner {
570570

571571
/// Update loop metadata and profile info for both the scalar remainder loop
572572
/// and \p VectorLoop, if it exists. Keeps all loop hints from the original
573-
/// loop on the vector loop and replaces vectorizer-specific metadata.
574-
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop,
575-
VPBasicBlock *HeaderVPBB,
576-
bool VectorizingEpilogue,
577-
unsigned EstimatedVFxUF,
578-
bool DisableRuntimeUnroll);
573+
/// loop on the vector loop and replaces vectorizer-specific metadata. The
574+
/// loop ID of the original loop \p OrigLoopID must be passed, together with
575+
/// the average trip count and invocation weight of the original loop (\p
576+
/// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
577+
/// cannot be retrieved after the plan has been executed, as the original loop
578+
/// may have been removed.
579+
void updateLoopMetadataAndProfileInfo(
580+
Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
581+
bool VectorizingEpilogue, MDNode *OrigLoopID,
582+
std::optional<unsigned> OrigAverageTripCount,
583+
unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
584+
bool DisableRuntimeUnroll);
579585

580586
protected:
581587
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2347,12 +2347,15 @@ Value *EpilogueVectorizerMainLoop::createIterationCountCheck(
23472347
}
23482348

23492349
/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
2350-
/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
2351-
/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
2352-
/// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
2350+
/// VPBB are moved to the end of the newly created VPIRBasicBlock. All
2351+
/// predecessors and successors of VPBB, if any, are rewired to the new
2352+
/// VPIRBasicBlock. If \p VPBB may be unreachable, \p Plan must be passed.
23532353
static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB,
2354-
BasicBlock *IRBB) {
2355-
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
2354+
BasicBlock *IRBB,
2355+
VPlan *Plan = nullptr) {
2356+
if (!Plan)
2357+
Plan = VPBB->getPlan();
2358+
VPIRBasicBlock *IRVPBB = Plan->createVPIRBasicBlock(IRBB);
23562359
auto IP = IRVPBB->begin();
23572360
for (auto &R : make_early_inc_range(VPBB->phis()))
23582361
R.moveBefore(*IRVPBB, IP);
@@ -7184,6 +7187,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
71847187
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
71857188
VPlanTransforms::simplifyRecipes(BestVPlan);
71867189
VPlanTransforms::removeBranchOnConst(BestVPlan);
7190+
if (BestVPlan.getEntry()->getSingleSuccessor() ==
7191+
BestVPlan.getScalarPreheader()) {
7192+
// TODO: The vector loop would be dead, should not even try to vectorize.
7193+
ORE->emit([&]() {
7194+
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationDead",
7195+
OrigLoop->getStartLoc(),
7196+
OrigLoop->getHeader())
7197+
<< "Created vector loop never executes due to insufficient trip "
7198+
"count.";
7199+
});
7200+
return DenseMap<const SCEV *, Value *>();
7201+
}
7202+
71877203
VPlanTransforms::narrowInterleaveGroups(
71887204
BestVPlan, BestVF,
71897205
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -7226,7 +7242,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72267242
// middle block. The vector loop is created during VPlan execution.
72277243
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
72287244
replaceVPBBWithIRVPBB(BestVPlan.getScalarPreheader(),
7229-
State.CFG.PrevBB->getSingleSuccessor());
7245+
State.CFG.PrevBB->getSingleSuccessor(), &BestVPlan);
72307246
VPlanTransforms::removeDeadRecipes(BestVPlan);
72317247

72327248
assert(verifyVPlanIsValid(BestVPlan, true /*VerifyLate*/) &&
@@ -7257,6 +7273,13 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72577273
//
72587274
//===------------------------------------------------===//
72597275

7276+
// Retrieve loop information before executing the plan, which may remove the
7277+
// original loop, if it becomes unreachable.
7278+
MDNode *LID = OrigLoop->getLoopID();
7279+
unsigned OrigLoopInvocationWeight = 0;
7280+
std::optional<unsigned> OrigAverageTripCount =
7281+
getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
7282+
72607283
BestVPlan.execute(&State);
72617284

72627285
// 2.6. Maintain Loop Hints
@@ -7270,7 +7293,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72707293
updateLoopMetadataAndProfileInfo(
72717294
HeaderVPBB ? LI->getLoopFor(State.CFG.VPBB2IRBB.lookup(HeaderVPBB))
72727295
: nullptr,
7273-
HeaderVPBB, VectorizingEpilogue,
7296+
HeaderVPBB, BestVPlan, VectorizingEpilogue, LID, OrigAverageTripCount,
7297+
OrigLoopInvocationWeight,
72747298
estimateElementCount(BestVF * BestUF, CM.getVScaleForTuning()),
72757299
DisableRuntimeUnroll);
72767300

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -969,12 +969,24 @@ void VPlan::execute(VPTransformState *State) {
969969
setName("Final VPlan");
970970
LLVM_DEBUG(dump());
971971

972-
// Disconnect scalar preheader and scalar header, as the dominator tree edge
973-
// will be updated as part of VPlan execution. This allows keeping the DTU
974-
// logic generic during VPlan execution.
975972
BasicBlock *ScalarPh = State->CFG.ExitBB;
976-
State->CFG.DTU.applyUpdates(
977-
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
973+
VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
974+
if (ScalarPhVPBB->hasPredecessors()) {
975+
// Disconnect scalar preheader and scalar header, as the dominator tree edge
976+
// will be updated as part of VPlan execution. This allows keeping the DTU
977+
// logic generic during VPlan execution.
978+
State->CFG.DTU.applyUpdates(
979+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
980+
} else {
981+
Loop *OrigLoop =
982+
State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
983+
// If the original loop is unreachable, we need to delete it.
984+
auto Blocks = OrigLoop->getBlocksVector();
985+
Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
986+
for (auto *BB : Blocks)
987+
State->LI->removeBlock(BB);
988+
State->LI->erase(OrigLoop);
989+
}
978990

979991
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
980992
Entry);
@@ -1648,14 +1660,18 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
16481660
}
16491661

16501662
void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
1651-
Loop *VectorLoop, VPBasicBlock *HeaderVPBB, bool VectorizingEpilogue,
1652-
unsigned EstimatedVFxUF, bool DisableRuntimeUnroll) {
1653-
MDNode *LID = OrigLoop->getLoopID();
1663+
Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1664+
bool VectorizingEpilogue, MDNode *OrigLoopID,
1665+
std::optional<unsigned> OrigAverageTripCount,
1666+
unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1667+
bool DisableRuntimeUnroll) {
16541668
// Update the metadata of the scalar loop. Skip the update when vectorizing
1655-
// the epilogue loop, to ensure it is only updated once.
1656-
if (!VectorizingEpilogue) {
1657-
std::optional<MDNode *> RemainderLoopID = makeFollowupLoopID(
1658-
LID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupEpilogue});
1669+
// the epilogue loop to ensure it is updated only once. Also skip the update
1670+
// when the scalar loop became unreachable.
1671+
if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
1672+
std::optional<MDNode *> RemainderLoopID =
1673+
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1674+
LLVMLoopVectorizeFollowupEpilogue});
16591675
if (RemainderLoopID) {
16601676
OrigLoop->setLoopID(*RemainderLoopID);
16611677
} else {
@@ -1670,15 +1686,15 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
16701686
if (!VectorLoop)
16711687
return;
16721688

1673-
if (std::optional<MDNode *> VectorizedLoopID =
1674-
makeFollowupLoopID(LID, {LLVMLoopVectorizeFollowupAll,
1675-
LLVMLoopVectorizeFollowupVectorized})) {
1689+
if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1690+
OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1691+
LLVMLoopVectorizeFollowupVectorized})) {
16761692
VectorLoop->setLoopID(*VectorizedLoopID);
16771693
} else {
16781694
// Keep all loop hints from the original loop on the vector loop (we'll
16791695
// replace the vectorizer-specific hints below).
1680-
if (LID)
1681-
VectorLoop->setLoopID(LID);
1696+
if (OrigLoopID)
1697+
VectorLoop->setLoopID(OrigLoopID);
16821698

16831699
if (!VectorizingEpilogue) {
16841700
LoopVectorizeHints Hints(VectorLoop, true, *ORE);
@@ -1723,7 +1739,21 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
17231739
// For scalable vectorization we can't know at compile time how many
17241740
// iterations of the loop are handled in one vector iteration, so instead
17251741
// use the value of vscale used for tuning.
1726-
setProfileInfoAfterUnrolling(OrigLoop, VectorLoop, OrigLoop, EstimatedVFxUF);
1742+
if (!OrigAverageTripCount)
1743+
return;
1744+
// Calculate number of iterations in unrolled loop.
1745+
unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1746+
// Calculate number of iterations for remainder loop.
1747+
unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1748+
1749+
if (HeaderVPBB) {
1750+
setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1751+
OrigLoopInvocationWeight);
1752+
}
1753+
if (Plan.getScalarPreheader()->hasPredecessors()) {
1754+
setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1755+
OrigLoopInvocationWeight);
1756+
}
17271757
}
17281758

17291759
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2215,10 +2215,12 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
22152215
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
22162216
vp_depth_first_shallow(Plan.getEntry()))) {
22172217
VPValue *Cond;
2218-
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
2219-
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
2218+
// Skip blocks that are not terminated by BranchOnCond.
2219+
if (VPBB->empty() || !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
22202220
continue;
22212221

2222+
assert(VPBB->getNumSuccessors() == 2 &&
2223+
"Two successors expected for BranchOnCond");
22222224
unsigned RemovedIdx;
22232225
if (match(Cond, m_True()))
22242226
RemovedIdx = 1;

llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) {
5252
; CHECK-NEXT: [[TMP17]] = add <2 x i64> [[VEC_PHI]], [[PREDPHI]]
5353
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5454
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
55-
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
56-
; CHECK: middle.block:
55+
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
56+
; CHECK: for.end:
5757
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP17]])
5858
; CHECK-NEXT: ret i64 [[TMP19]]
5959
;

llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ exit:
180180
define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i1 %c.0) {
181181
; CHECK-LABEL: define void @test_blend_feeding_replicated_store_2(
182182
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i1 [[C_0:%.*]]) {
183-
; CHECK-NEXT: [[ENTRY:.*]]:
184-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
183+
; CHECK-NEXT: [[ENTRY:.*:]]
184+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
185185
; CHECK: [[VECTOR_PH]]:
186186
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
187187
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
@@ -344,12 +344,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
344344
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
345345
; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
346346
; CHECK: [[MIDDLE_BLOCK]]:
347-
; CHECK-NEXT: br label %[[SCALAR_PH]]
347+
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
348348
; CHECK: [[SCALAR_PH]]:
349-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
350349
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
351350
; CHECK: [[LOOP_HEADER]]:
352-
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
351+
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 96, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
353352
; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV1]]
354353
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
355354
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ target triple = "arm64-apple-macosx11.0.0"
66
define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
77
; CHECK-LABEL: define void @fshl_operand_first_order_recurrence(
88
; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[SRC:%.*]]) {
9-
; CHECK-NEXT: [[ENTRY:.*]]:
10-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
1111
; CHECK: [[VECTOR_PH]]:
1212
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1313
; CHECK: [[VECTOR_BODY]]:
@@ -30,14 +30,12 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
3030
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3131
; CHECK: [[MIDDLE_BLOCK]]:
3232
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
33-
; CHECK-NEXT: br label %[[SCALAR_PH]]
33+
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
3434
; CHECK: [[SCALAR_PH]]:
35-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
36-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
3735
; CHECK-NEXT: br label %[[LOOP:.*]]
3836
; CHECK: [[LOOP]]:
39-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
40-
; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
37+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
38+
; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
4139
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
4240
; CHECK-NEXT: [[L]] = load i64, ptr [[GEP_SRC]], align 8
4341
; CHECK-NEXT: [[OR:%.*]] = tail call i64 @llvm.fshl.i64(i64 1, i64 [[RECUR]], i64 1)
@@ -73,7 +71,7 @@ define void @powi_call(ptr %P) {
7371
; CHECK-LABEL: define void @powi_call(
7472
; CHECK-SAME: ptr [[P:%.*]]) {
7573
; CHECK-NEXT: [[ENTRY:.*:]]
76-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
74+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
7775
; CHECK: [[VECTOR_PH]]:
7876
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7977
; CHECK: [[VECTOR_BODY]]:
@@ -83,7 +81,7 @@ define void @powi_call(ptr %P) {
8381
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
8482
; CHECK: [[MIDDLE_BLOCK]]:
8583
; CHECK-NEXT: br label %[[EXIT:.*]]
86-
; CHECK: [[SCALAR_PH]]:
84+
; CHECK: [[SCALAR_PH:.*]]:
8785
; CHECK-NEXT: br label %[[LOOP:.*]]
8886
; CHECK: [[LOOP]]:
8987
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -93,7 +91,7 @@ define void @powi_call(ptr %P) {
9391
; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8
9492
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
9593
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
96-
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
94+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]]
9795
; CHECK: [[EXIT]]:
9896
; CHECK-NEXT: ret void
9997
;
@@ -224,5 +222,4 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
224222
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
225223
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
226224
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
227-
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
228225
;.

0 commit comments

Comments
 (0)