Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
f85de24
[LV] Add variable trip count for test.
fhahn Aug 20, 2025
33afce8
[VPlan] Simplify Plan's entry in removeBranchOnConst.
fhahn Aug 14, 2025
208a182
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 1, 2025
1e16872
!fixup address latest comments, thanks!
fhahn Sep 1, 2025
17fe80c
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 1, 2025
528e463
!fixup address latest comments,t hanks!
fhahn Sep 1, 2025
03203ae
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 1, 2025
7b19cec
!Fxiup address comments, thanks
fhahn Sep 1, 2025
c9228c1
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 1, 2025
189b639
!fixup fix profile info
fhahn Sep 1, 2025
01e7486
!fixup fix formatting
fhahn Sep 1, 2025
3d90160
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 1, 2025
4390c24
!fixup update new tests
fhahn Sep 1, 2025
e0f99d9
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 3, 2025
519ae8b
!fixup move out loop metadata update
fhahn Sep 3, 2025
ca164d9
!fixup move
fhahn Sep 3, 2025
1bac0c2
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 3, 2025
0006272
!fixup move more
fhahn Sep 4, 2025
5b28b16
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 4, 2025
05c8386
[VPlan] Consolidate logic to update loop metadata and profile info.
fhahn Sep 4, 2025
3b47e50
Merge branch 'main' into vplan-simplify-branch-on-const-entry-tmp
fhahn Sep 4, 2025
7bcbbe6
!fixu update on top of main
fhahn Sep 4, 2025
174293a
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 4, 2025
afdf4c2
!fixup fix formatting
fhahn Sep 4, 2025
4accca8
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 12, 2025
df8c9da
!fixup update tests after merge
fhahn Sep 12, 2025
83cb4dc
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 14, 2025
cae7c85
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 15, 2025
6ea007b
!fixup address latest comments, thanks!
fhahn Sep 15, 2025
ea2db4e
!fixup restore tests
fhahn Sep 15, 2025
bef221f
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 16, 2025
6a89924
!fixup address comments, thanks
fhahn Sep 16, 2025
0ec1a59
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 16, 2025
44537d9
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 16, 2025
b5405c1
!fixup adjust check and message.
fhahn Sep 16, 2025
ce766c7
Merge remote-tracking branch 'origin/main' into vplan-simplify-branch…
fhahn Sep 18, 2025
be17a75
Merge branch 'main' into vplan-simplify-branch-on-const-entry
fhahn Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
17 changes: 0 additions & 17 deletions llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,23 +533,6 @@ LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI,
ReplaceExitVal ReplaceExitValue,
SmallVector<WeakTrackingVH, 16> &DeadInsts);

/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
/// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p
/// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that
/// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect
/// the remaining TC%UF iterations.
///
/// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p
/// RemainderLoop in which case weights for \p OrigLoop are updated accordingly.
/// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are
/// equal. \p UF must be greater than zero.
/// If \p OrigLoop has no profile info associated nothing happens.
///
/// This utility may be useful for such optimizations as unroller and
/// vectorizer as it's typical transformation for them.
LLVM_ABI void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
Loop *RemainderLoop, uint64_t UF);

/// Utility that implements appending of loops onto a worklist given a range.
/// We want to process loops in postorder, but the worklist is a LIFO data
/// structure, so we append to it in *reverse* postorder.
Expand Down
26 changes: 0 additions & 26 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1865,32 +1865,6 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
return NumReplaced;
}

/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
/// \p OrigLoop.
void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
Loop *RemainderLoop, uint64_t UF) {
assert(UF > 0 && "Zero unrolled factor is not supported");
assert(UnrolledLoop != RemainderLoop &&
"Unrolled and Remainder loops are expected to distinct");

// Get number of iterations in the original scalar loop.
unsigned OrigLoopInvocationWeight = 0;
std::optional<unsigned> OrigAverageTripCount =
getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
if (!OrigAverageTripCount)
return;

// Calculate number of iterations in unrolled loop.
unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;
// Calculate number of iterations for remainder loop.
unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;

setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,
OrigLoopInvocationWeight);
setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,
OrigLoopInvocationWeight);
}

/// Utility that implements appending of loops onto a worklist.
/// Loops are added in preorder (analogous for reverse postorder for trees),
/// and the worklist is processed LIFO.
Expand Down
18 changes: 12 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,12 +570,18 @@ class LoopVectorizationPlanner {

/// Update loop metadata and profile info for both the scalar remainder loop
/// and \p VectorLoop, if it exists. Keeps all loop hints from the original
/// loop on the vector loop and replaces vectorizer-specific metadata.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop,
VPBasicBlock *HeaderVPBB,
bool VectorizingEpilogue,
unsigned EstimatedVFxUF,
bool DisableRuntimeUnroll);
/// loop on the vector loop and replaces vectorizer-specific metadata. The
/// loop ID of the original loop \p OrigLoopID must be passed, together with
/// the average trip count and invocation weight of the original loop (\p
/// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
/// cannot be retrieved after the plan has been executed, as the original loop
/// may have been removed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// may have been removed.
/// may have been removed).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Closed ) above thanks

void updateLoopMetadataAndProfileInfo(
Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
bool VectorizingEpilogue, MDNode *OrigLoopID,
std::optional<unsigned> OrigAverageTripCount,
unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
bool DisableRuntimeUnroll);

protected:
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
Expand Down
38 changes: 31 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2347,12 +2347,15 @@ Value *EpilogueVectorizerMainLoop::createIterationCountCheck(
}

/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
/// successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
/// VPBB are moved to the end of the newly created VPIRBasicBlock. All
/// predecessors and successors of VPBB, if any, are rewired to the new
/// VPIRBasicBlock. If \p VPBB may be unreachable, \p Plan must be passed.
static VPIRBasicBlock *replaceVPBBWithIRVPBB(VPBasicBlock *VPBB,
BasicBlock *IRBB) {
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
BasicBlock *IRBB,
VPlan *Plan = nullptr) {
if (!Plan)
Plan = VPBB->getPlan();
VPIRBasicBlock *IRVPBB = Plan->createVPIRBasicBlock(IRBB);
auto IP = IRVPBB->begin();
for (auto &R : make_early_inc_range(VPBB->phis()))
R.moveBefore(*IRVPBB, IP);
Expand Down Expand Up @@ -7184,6 +7187,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan);
VPlanTransforms::removeBranchOnConst(BestVPlan);
if (BestVPlan.getEntry()->getSingleSuccessor() ==
BestVPlan.getScalarPreheader()) {
Comment on lines +7190 to +7191
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removeBranchOnConst() could conceivably bypass the vector loop; this actually happens in few tests. Worth emitting a missed-vectorization remark.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, added an analysis remark. I am not sure if missed-vectorization would be accurate, because this is for cases where we would create a dead vector loop and should not even try to vectorize.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, it appears the loop isn't vectorized because the Trip Count guard is known to always jump to the scalar loop, i.e., where VFxUF is known to exceed TC, so conceptually a smaller VFxUF could work. But tests include unvectorizable non-loop cases where TC<=1, which should better be cleaned up before calling LV, certainly before reaching LVP::executePlan().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, we already have a TODO where we created the known True condition

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a TODO here too; wondering if the message should specify that vectorization is dead or never executes - due to insufficient trip-count.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated message to mention insufficient trip count, thanks

// TODO: The vector loop would be dead, should not even try to vectorize.
ORE->emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationDead",
OrigLoop->getStartLoc(),
OrigLoop->getHeader())
<< "Created vector loop never executes due to insufficient trip "
"count.";
});
return DenseMap<const SCEV *, Value *>();
}

VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
Expand Down Expand Up @@ -7226,7 +7242,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// middle block. The vector loop is created during VPlan execution.
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
replaceVPBBWithIRVPBB(BestVPlan.getScalarPreheader(),
State.CFG.PrevBB->getSingleSuccessor());
State.CFG.PrevBB->getSingleSuccessor(), &BestVPlan);
VPlanTransforms::removeDeadRecipes(BestVPlan);

assert(verifyVPlanIsValid(BestVPlan, true /*VerifyLate*/) &&
Expand Down Expand Up @@ -7257,6 +7273,13 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
//
//===------------------------------------------------===//

// Retrieve loop information before executing the plan, which may remove the
// original loop, if it becomes unreachable.
MDNode *LID = OrigLoop->getLoopID();
unsigned OrigLoopInvocationWeight = 0;
std::optional<unsigned> OrigAverageTripCount =
getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);

BestVPlan.execute(&State);

// 2.6. Maintain Loop Hints
Expand All @@ -7270,7 +7293,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
updateLoopMetadataAndProfileInfo(
HeaderVPBB ? LI->getLoopFor(State.CFG.VPBB2IRBB.lookup(HeaderVPBB))
: nullptr,
HeaderVPBB, VectorizingEpilogue,
HeaderVPBB, BestVPlan, VectorizingEpilogue, LID, OrigAverageTripCount,
OrigLoopInvocationWeight,
estimateElementCount(BestVF * BestUF, CM.getVScaleForTuning()),
DisableRuntimeUnroll);

Expand Down
66 changes: 48 additions & 18 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -969,12 +969,24 @@ void VPlan::execute(VPTransformState *State) {
setName("Final VPlan");
LLVM_DEBUG(dump());

// Disconnect scalar preheader and scalar header, as the dominator tree edge
// will be updated as part of VPlan execution. This allows keeping the DTU
// logic generic during VPlan execution.
BasicBlock *ScalarPh = State->CFG.ExitBB;
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
if (ScalarPhVPBB->hasPredecessors()) {
// Disconnect scalar preheader and scalar header, as the dominator tree edge
// will be updated as part of VPlan execution. This allows keeping the DTU
// logic generic during VPlan execution.
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
} else {
Loop *OrigLoop =
State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
// If the original loop is unreachable, we need to delete it.
auto Blocks = OrigLoop->getBlocksVector();
Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
for (auto *BB : Blocks)
State->LI->removeBlock(BB);
State->LI->erase(OrigLoop);
}

ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
Entry);
Expand Down Expand Up @@ -1648,14 +1660,18 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
}

void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
Loop *VectorLoop, VPBasicBlock *HeaderVPBB, bool VectorizingEpilogue,
unsigned EstimatedVFxUF, bool DisableRuntimeUnroll) {
MDNode *LID = OrigLoop->getLoopID();
Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
bool VectorizingEpilogue, MDNode *OrigLoopID,
std::optional<unsigned> OrigAverageTripCount,
unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
bool DisableRuntimeUnroll) {
// Update the metadata of the scalar loop. Skip the update when vectorizing
// the epilogue loop, to ensure it is only updated once.
if (!VectorizingEpilogue) {
std::optional<MDNode *> RemainderLoopID = makeFollowupLoopID(
LID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupEpilogue});
// the epilogue loop to ensure it is updated only once. Also skip the update
// when the scalar loop became unreachable.
if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
std::optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
if (RemainderLoopID) {
OrigLoop->setLoopID(*RemainderLoopID);
} else {
Expand All @@ -1670,15 +1686,15 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
if (!VectorLoop)
return;

if (std::optional<MDNode *> VectorizedLoopID =
makeFollowupLoopID(LID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupVectorized})) {
if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupVectorized})) {
VectorLoop->setLoopID(*VectorizedLoopID);
} else {
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
if (LID)
VectorLoop->setLoopID(LID);
if (OrigLoopID)
VectorLoop->setLoopID(OrigLoopID);

if (!VectorizingEpilogue) {
LoopVectorizeHints Hints(VectorLoop, true, *ORE);
Expand Down Expand Up @@ -1723,7 +1739,21 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
// For scalable vectorization we can't know at compile time how many
// iterations of the loop are handled in one vector iteration, so instead
// use the value of vscale used for tuning.
setProfileInfoAfterUnrolling(OrigLoop, VectorLoop, OrigLoop, EstimatedVFxUF);
if (!OrigAverageTripCount)
return;
// Calculate number of iterations in unrolled loop.
unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
// Calculate number of iterations for remainder loop.
unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;

if (HeaderVPBB) {
setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
OrigLoopInvocationWeight);
}
if (Plan.getScalarPreheader()->hasPredecessors()) {
setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
OrigLoopInvocationWeight);
}
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2215,10 +2215,12 @@ void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
VPValue *Cond;
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
// Skip blocks that are not terminated by BranchOnCond.
if (VPBB->empty() || !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;

assert(VPBB->getNumSuccessors() == 2 &&
"Two successors expected for BranchOnCond");
unsigned RemovedIdx;
if (match(Cond, m_True()))
RemovedIdx = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) {
; CHECK-NEXT: [[TMP17]] = add <2 x i64> [[VEC_PHI]], [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP17]])
; CHECK-NEXT: ret i64 [[TMP19]]
;
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ exit:
define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i1 %c.0) {
; CHECK-LABEL: define void @test_blend_feeding_replicated_store_2(
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i1 [[C_0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
Expand Down Expand Up @@ -344,12 +344,11 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 96, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV1]]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0
Expand Down
19 changes: 8 additions & 11 deletions llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ target triple = "arm64-apple-macosx11.0.0"
define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-LABEL: define void @fshl_operand_first_order_recurrence(
; CHECK-SAME: ptr [[DST:%.*]], ptr noalias [[SRC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
Expand All @@ -30,14 +30,12 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RECUR:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[L]] = load i64, ptr [[GEP_SRC]], align 8
; CHECK-NEXT: [[OR:%.*]] = tail call i64 @llvm.fshl.i64(i64 1, i64 [[RECUR]], i64 1)
Expand Down Expand Up @@ -73,7 +71,7 @@ define void @powi_call(ptr %P) {
; CHECK-LABEL: define void @powi_call(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
Expand All @@ -83,7 +81,7 @@ define void @powi_call(ptr %P) {
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK: [[SCALAR_PH:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
Expand All @@ -93,7 +91,7 @@ define void @powi_call(ptr %P) {
; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

metadata dropped, scalar loop unreachable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep

; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -224,5 +222,4 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
;.
Loading